diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 1e6c7e96f..4bc8b99e1 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -1,5 +1,6 @@ import json import os +import random from datetime import timedelta from botocore.exceptions import ClientError @@ -29,8 +30,7 @@ DELIVERY_RECEIPT_DELAY_IN_SECONDS = 30 @notify_celery.task( bind=True, name="check_sms_delivery_receipt", - max_retries=48, - default_retry_delay=300, + max_retries=72, ) def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): """ @@ -62,7 +62,10 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): carrier=carrier, provider_response=provider_response, ) - raise self.retry(exc=ntfe) + base_delay = 3600 # one hour + jitter = random.randint(-1200, +1200) # nosec B311 + retry_delay = base_delay + jitter + raise self.retry(countdown=retry_delay, exc=ntfe) except ClientError as err: # Probably a ThrottlingException but could be something else error_code = err.response["Error"]["Code"] @@ -77,7 +80,10 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): carrier=carrier, provider_response=provider_response, ) - raise self.retry(exc=err) + base_delay = 3600 # one hour + jitter = random.randint(-1200, +1200) # nosec B311 + retry_delay = base_delay + jitter + raise self.retry(countdown=retry_delay, exc=err) if status == "success": status = NotificationStatus.DELIVERED diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index 36bcf5dca..8ef34abac 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -158,7 +158,7 @@ class AwsCloudwatchClient(Client): message["delivery"].get("phoneCarrier", "Unknown Carrier"), ) - if time_now > (created_at + timedelta(hours=3)): + if time_now > (created_at + timedelta(hours=73)): # see app/models.py Notification. This message corresponds to "permanent-failure", # but we are copy/pasting here to avoid circular imports. return "failure", "Unable to find carrier response." diff --git a/app/config.py b/app/config.py index 12159e289..1ec8abd59 100644 --- a/app/config.py +++ b/app/config.py @@ -167,7 +167,7 @@ class Config(object): current_minute = (datetime.now().minute + 1) % 60 CELERY = { - "worker_max_tasks_per_child": 500, + "worker_max_tasks_per_child": 2000, "broker_url": REDIS_URL, "broker_transport_options": { "visibility_timeout": 310,