From 92086e20903feb35df9f18f2a59dc19e7b46e19c Mon Sep 17 00:00:00 2001 From: sakisv Date: Thu, 4 Nov 2021 11:24:44 +0200 Subject: [PATCH] Reduce concurrency on high volume workers We noticed that having high concurrency led to significant memory usage. The hypothesis is that because of long polling, there are many connections being held open which seems to impact the memory usage. Initially the high concurrency was put in place as a way to get around the lack of long polling: We were spawning multiple processes and each one was doing many requests to SQS to check for and receive new tasks. Now with long polling enabled and reduced concurrency, the workers are much more efficient at their job (the tasks are being picked up so fast that the queues are practically empty) and much lighter on resource requirements. (This last bit will allow us to reduce the memory requirement for heavy workers like the sender and reduce our costs) The concurrency number was chosen semi-arbitrarily: Usually this is set to the number of CPUs available to the system. Because we're running on PaaS and that number is both abstracted and may be claimed for by other processes, we went for a conservative one to also reduce the competion for CPU among the processes of the same worker instance. --- scripts/paas_app_wrapper.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/paas_app_wrapper.sh b/scripts/paas_app_wrapper.sh index 7bb89d759..b48627bf6 100755 --- a/scripts/paas_app_wrapper.sh +++ b/scripts/paas_app_wrapper.sh @@ -21,7 +21,7 @@ case $NOTIFY_APP_NAME in -Q research-mode-tasks 2> /dev/null ;; delivery-worker-sender) - exec scripts/run_multi_worker_app_paas.sh celery multi start 3 -c 10 -A run_celery.notify_celery --loglevel=INFO \ + exec scripts/run_multi_worker_app_paas.sh celery multi start 3 -c 4 -A run_celery.notify_celery --loglevel=INFO \ --logfile=/dev/null --pidfile=/tmp/celery%N.pid -Q send-sms-tasks,send-email-tasks ;; delivery-worker-periodic) @@ -33,7 +33,7 @@ case $NOTIFY_APP_NAME in -Q reporting-tasks 2> /dev/null ;; delivery-worker-priority) - exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=5 \ + exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=4 \ -Q priority-tasks 2> /dev/null ;; # Only consume the notify-internal-tasks queue on this app so that Notify messages are processed as a priority @@ -46,15 +46,15 @@ case $NOTIFY_APP_NAME in -Q broadcast-tasks 2> /dev/null ;; delivery-worker-receipts) - exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=11 \ + exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=4 \ -Q ses-callbacks,sms-callbacks 2> /dev/null ;; delivery-worker-service-callbacks) - exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=11 \ + exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=4 \ -Q service-callbacks,service-callbacks-retry 2> /dev/null ;; delivery-worker-save-api-notifications) - exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=11 \ + exec scripts/run_app_paas.sh celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=4 \ -Q save-api-email-tasks,save-api-sms-tasks 2> /dev/null ;; delivery-celery-beat)