From 1872854a4ec6fe31ffdfeeff2a7889bb974d14b1 Mon Sep 17 00:00:00 2001 From: Ben Thorner Date: Mon, 8 Nov 2021 10:38:53 +0000 Subject: [PATCH] Improve and clarify large task error handling Previously we were catching one type of exception if something went wrong adding a notification to the queue for high volume services. In reality there are two types of exception so this adds a second handler to cover both. For context, this is code we changed experimentally as part of the upgrade to Celery 5 [1]. At the time we didn't check how the new exception compared to the old one. It turns out they behaved the same and we were always vulnerable to the scenario now covered by the second exception, where the behaviour has changed in Celery 5 - testing with a large task invocation gives... Before (Celery 3, large-ish task): 'process_job.apply_async(["a" * 200000])'... boto.exception.SQSError: SQSError: 400 Bad Request SenderInvalidParameterValueOne or more parameters are invalid. Reason: Message must be shorter than 262144 bytes.96162552-cd96-5a14-b3a5-7f503300a662 Before (Celery 3, very large task): After (Celery 5, large-ish task): botocore.exceptions.ClientError: An error occurred (InvalidParameterValue) when calling the SendMessage operation: One or more parameters are invalid. Reason: Message must be shorter than 262144 bytes. After (Celery 5, very large task): botocore.parsers.ResponseParserError: Unable to parse response (syntax error: line 1, column 0), invalid XML received. Further retries may succeed: b'HTTP content length exceeded 1662976 bytes.' [1]: https://github.com/alphagov/notifications-api/pull/3355/commits/29c92a9e54fe406f9490f6c89bd047832f75ab5f --- app/v2/notifications/post_notifications.py | 7 ++++--- .../app/v2/notifications/test_post_notifications.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/app/v2/notifications/post_notifications.py b/app/v2/notifications/post_notifications.py index 01cf21a56..c5ea8d2b3 100644 --- a/app/v2/notifications/post_notifications.py +++ b/app/v2/notifications/post_notifications.py @@ -232,9 +232,10 @@ def process_sms_or_email_notification( reply_to_text=reply_to_text ) return resp - except botocore.exceptions.ClientError: - # if SQS cannot put the task on the queue, it's probably because the notification body was too long and it - # went over SQS's 256kb message limit. If so, we + except (botocore.exceptions.ClientError, botocore.parsers.ResponseParserError): + # If SQS cannot put the task on the queue, it's probably because the notification body was too long and it + # went over SQS's 256kb message limit. If the body is very large, it may exceed the HTTP max content length; + # the exception we get here isn't handled correctly by botocore - we get a ResponseParserError instead. current_app.logger.info( f'Notification {notification_id} failed to save to high volume queue. Using normal flow instead' ) diff --git a/tests/app/v2/notifications/test_post_notifications.py b/tests/app/v2/notifications/test_post_notifications.py index 7d500d84a..1164eb34f 100644 --- a/tests/app/v2/notifications/test_post_notifications.py +++ b/tests/app/v2/notifications/test_post_notifications.py @@ -1068,13 +1068,21 @@ def test_post_notifications_saves_email_or_sms_to_queue(client, notify_db_sessio assert len(Notification.query.all()) == 0 +@pytest.mark.parametrize("exception", [ + botocore.exceptions.ClientError({'some': 'json'}, 'some opname'), + botocore.parsers.ResponseParserError('exceeded max HTTP body length'), +]) @pytest.mark.parametrize("notification_type", ("email", "sms")) def test_post_notifications_saves_email_or_sms_normally_if_saving_to_queue_fails( - client, notify_db_session, mocker, notification_type + client, + notify_db_session, + mocker, + notification_type, + exception ): save_task = mocker.patch( f"app.celery.tasks.save_api_{notification_type}.apply_async", - side_effect=botocore.exceptions.ClientError({'some': 'json'}, 'some opname') + side_effect=exception, ) mock_send_task = mocker.patch(f'app.celery.provider_tasks.deliver_{notification_type}.apply_async')