From 3fb113a83e5cac3673ae7c53bc1e39dd7596a3b1 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 4 May 2023 07:56:24 -0700 Subject: [PATCH 1/8] notify-152 sms delivery receipts --- app/__init__.py | 2 + app/celery/provider_tasks.py | 33 ++++++++- app/clients/cloudwatch/__init__.py | 25 +++++++ app/clients/cloudwatch/aws_cloudwatch.py | 89 ++++++++++++++++++++++++ app/config.py | 2 + app/dao/notifications_dao.py | 4 +- app/delivery/send_to_providers.py | 6 +- tests/app/celery/test_provider_tasks.py | 1 + tests/app/clients/test_aws_cloudwatch.py | 79 +++++++++++++++++++++ tests/app/test_config.py | 3 +- 10 files changed, 237 insertions(+), 7 deletions(-) create mode 100644 app/clients/cloudwatch/__init__.py create mode 100644 app/clients/cloudwatch/aws_cloudwatch.py create mode 100644 tests/app/clients/test_aws_cloudwatch.py diff --git a/app/__init__.py b/app/__init__.py index abadaa315..f39be3c22 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -29,6 +29,7 @@ from werkzeug.exceptions import HTTPException as WerkzeugHTTPException from werkzeug.local import LocalProxy from app.clients import NotificationProviderClients +from app.clients.cloudwatch.aws_cloudwatch import AwsCloudwatchClient from app.clients.document_download import DocumentDownloadClient from app.clients.email.aws_ses import AwsSesClient from app.clients.email.aws_ses_stub import AwsSesStubClient @@ -55,6 +56,7 @@ notify_celery = NotifyCelery() aws_ses_client = AwsSesClient() aws_ses_stub_client = AwsSesStubClient() aws_sns_client = AwsSnsClient() +aws_cloudwatch_client = AwsCloudwatchClient() encryption = Encryption() zendesk_client = ZendeskClient() redis_store = RedisClient() diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index a274635ce..bee645095 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -1,7 +1,11 @@ +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo + from flask import current_app from sqlalchemy.orm.exc import NoResultFound from app import notify_celery +from app.clients.cloudwatch.aws_cloudwatch import AwsCloudwatchClient from app.clients.email import EmailClientNonRetryableException from app.clients.email.aws_ses import AwsSesClientThrottlingSendRateException from app.clients.sms import SmsClientResponseException @@ -10,7 +14,24 @@ from app.dao import notifications_dao from app.dao.notifications_dao import update_notification_status_by_id from app.delivery import send_to_providers from app.exceptions import NotificationTechnicalFailureException -from app.models import NOTIFICATION_TECHNICAL_FAILURE +from app.models import ( + NOTIFICATION_FAILED, + NOTIFICATION_SENT, + NOTIFICATION_TECHNICAL_FAILURE, +) + + +@notify_celery.task(bind=True, name="check_sms_delivery_receipt", max_retries=3, default_retry_delay=300) +def check_sms_delivery_receipt(self, message_id, notification_id): + current_app.logger.warning(f"CHECKING DELIVERY RECEIPT for {message_id} {notification_id}") + cloudwatch_client = AwsCloudwatchClient() + cloudwatch_client.init_app(current_app) + status, provider_response = cloudwatch_client.check_sms(message_id, notification_id) + if status == 'success': + status = NOTIFICATION_SENT + else: + status = NOTIFICATION_FAILED + update_notification_status_by_id(notification_id, status, provider_response=provider_response) @notify_celery.task(bind=True, name="deliver_sms", max_retries=48, default_retry_delay=300) @@ -20,7 +41,15 @@ def deliver_sms(self, notification_id): notification = notifications_dao.get_notification_by_id(notification_id) if not notification: raise NoResultFound() - send_to_providers.send_sms_to_provider(notification) + message_id = send_to_providers.send_sms_to_provider(notification) + # We have to put it in the default US/Eastern timezone. From zones west of there, the delay + # will be ignored and it will fire immediately (although this probably only affects developer testing) + my_eta = datetime.now(ZoneInfo('US/Eastern')) + timedelta(seconds=300) + check_sms_delivery_receipt.apply_async( + [message_id, notification_id], + eta=my_eta, + queue=QueueNames.CHECK_SMS + ) except Exception as e: if isinstance(e, SmsClientResponseException): current_app.logger.warning( diff --git a/app/clients/cloudwatch/__init__.py b/app/clients/cloudwatch/__init__.py new file mode 100644 index 000000000..31c79f889 --- /dev/null +++ b/app/clients/cloudwatch/__init__.py @@ -0,0 +1,25 @@ +from app.clients import Client, ClientException + + +class CloudwatchClientResponseException(ClientException): + """ + Base Exception for SmsClientsResponses + """ + + def __init__(self, message): + self.message = message + + def __str__(self): + return "Message {}".format(self.message) + + +class CloudwatchClient(Client): + """ + Base Cloudwatch client for checking sms. + """ + + def init_app(self, *args, **kwargs): + raise NotImplementedError("TODO Need to implement.") + + def check_sms(self, *args, **kwargs): + raise NotImplementedError("TODO Need to implement.") diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py new file mode 100644 index 000000000..847b39bf9 --- /dev/null +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -0,0 +1,89 @@ +import json +import os +import re +import time + +from boto3 import client + +from app.clients.cloudwatch import CloudwatchClient +from app.cloudfoundry_config import cloud_config + + +class AwsCloudwatchClient(CloudwatchClient): + """ + AwsCloudwatch cloudwatch client + """ + + def init_app(self, current_app, *args, **kwargs): + self._client = client( + "logs", + region_name=cloud_config.sns_region, + aws_access_key_id=cloud_config.sns_access_key, + aws_secret_access_key=cloud_config.sns_secret_key + ) + super(CloudwatchClient, self).__init__(*args, **kwargs) + self.current_app = current_app + self._valid_sender_regex = re.compile(r"^\+?\d{5,14}$") + + @property + def name(self): + return 'cloudwatch' + + def _get_all_logs(self, my_filter, log_group_name): + now = round(time.time() * 1000) + beginning = now - 30 * 60 * 1000 + + next_token = None + all_log_events = [] + while True: + if next_token: + response = self._client.filter_log_events( + logGroupName=log_group_name, + filterPattern=my_filter, + nextToken=next_token, + startTime=beginning, + endTime=now + ) + else: + response = self._client.filter_log_events( + logGroupName=log_group_name, + filterPattern=my_filter, + startTime=beginning, + endTime=now + ) + log_events = response.get('events', []) + all_log_events.extend(log_events) + next_token = response.get('nextToken') + if not next_token: + break + return all_log_events + + def check_sms(self, message_id, notification_id): + # TODO presumably there is a better way to get the account number + account_number = os.getenv("SES_DOMAIN_ARN") + account_number = account_number.replace('arn:aws:ses:us-west-2:', '') + account_number = account_number.split(":") + account_number = account_number[0] + + log_group_name = f'sns/us-west-2/{account_number}/DirectPublishToPhoneNumber' + filter_pattern = '{$.notification.messageId="XXXXX"}' + filter_pattern = filter_pattern.replace("XXXXX", message_id) + all_log_events = self._get_all_logs(filter_pattern, log_group_name) + + self.current_app.logger.warning(f"ALL EVENTS {all_log_events}") + + if all_log_events and len(all_log_events) > 0: + event = all_log_events[0] + self.current_app.logger.warning(f"HERE IS AN EVENT {event} of type {type(event)}") + message = json.loads(event['message']) + self.current_app.logger.warning(f"HERE IS THE message {message}") + return "success", message['delivery']['providerResponse'] + + log_group_name = f'sns/us-west-2/{account_number}/DirectPublishToPhoneNumber/Failure' + all_failed_events = self._get_all_logs(filter_pattern, log_group_name) + if all_failed_events and len(all_failed_events) > 0: + event = all_failed_events[0] + message = json.loads(event['message']) + return "fail", message['delivery']['providerResponse'] + + raise Exception(f'No event found for message_id {message_id}') diff --git a/app/config.py b/app/config.py index 466efcd76..d1762168b 100644 --- a/app/config.py +++ b/app/config.py @@ -13,6 +13,7 @@ class QueueNames(object): PRIORITY = 'priority-tasks' DATABASE = 'database-tasks' SEND_SMS = 'send-sms-tasks' + CHECK_SMS = 'check-sms_tasks' SEND_EMAIL = 'send-email-tasks' RESEARCH_MODE = 'research-mode-tasks' REPORTING = 'reporting-tasks' @@ -33,6 +34,7 @@ class QueueNames(object): QueueNames.PERIODIC, QueueNames.DATABASE, QueueNames.SEND_SMS, + QueueNames.CHECK_SMS, QueueNames.SEND_EMAIL, QueueNames.RESEARCH_MODE, QueueNames.REPORTING, diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index d560e61eb..ae8405440 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -95,7 +95,7 @@ def _update_notification_status(notification, status, provider_response=None): @autocommit -def update_notification_status_by_id(notification_id, status, sent_by=None): +def update_notification_status_by_id(notification_id, status, sent_by=None, provider_response=None): notification = Notification.query.with_for_update().filter(Notification.id == notification_id).first() if not notification: @@ -121,6 +121,8 @@ def update_notification_status_by_id(notification_id, status, sent_by=None): and not country_records_delivery(notification.phone_prefix) ): return None + if provider_response: + notification.provider_response = provider_response if not notification.sent_by and sent_by: notification.sent_by = sent_by return _update_notification_status( diff --git a/app/delivery/send_to_providers.py b/app/delivery/send_to_providers.py index db331db43..380ec7b4d 100644 --- a/app/delivery/send_to_providers.py +++ b/app/delivery/send_to_providers.py @@ -38,7 +38,7 @@ from app.serialised_models import SerialisedService, SerialisedTemplate def send_sms_to_provider(notification): service = SerialisedService.from_id(notification.service_id) - + message_id = None if not service.active: technical_failure(notification=notification) return @@ -79,7 +79,7 @@ def send_sms_to_provider(notification): 'international': notification.international, } db.session.close() # no commit needed as no changes to objects have been made above - provider.send_sms(**send_sms_kwargs) + message_id = provider.send_sms(**send_sms_kwargs) except Exception as e: notification.billable_units = template.fragment_count dao_update_notification(notification) @@ -88,6 +88,7 @@ def send_sms_to_provider(notification): else: notification.billable_units = template.fragment_count update_notification_to_sending(notification, provider) + return message_id def send_email_to_provider(notification): @@ -98,7 +99,6 @@ def send_email_to_provider(notification): return if notification.status == 'created': provider = provider_to_use(EMAIL_TYPE, False) - template_dict = SerialisedTemplate.from_id_and_service_id( template_id=notification.template_id, service_id=service.id, version=notification.template_version ).__dict__ diff --git a/tests/app/celery/test_provider_tasks.py b/tests/app/celery/test_provider_tasks.py index 2f241bc24..d4a9070bf 100644 --- a/tests/app/celery/test_provider_tasks.py +++ b/tests/app/celery/test_provider_tasks.py @@ -23,6 +23,7 @@ def test_should_call_send_sms_to_provider_from_deliver_sms_task( sample_notification, mocker): mocker.patch('app.delivery.send_to_providers.send_sms_to_provider') + mocker.patch('app.celery.provider_tasks.check_sms_delivery_receipt') deliver_sms(sample_notification.id) app.delivery.send_to_providers.send_sms_to_provider.assert_called_with(sample_notification) diff --git a/tests/app/clients/test_aws_cloudwatch.py b/tests/app/clients/test_aws_cloudwatch.py new file mode 100644 index 000000000..78faa69ea --- /dev/null +++ b/tests/app/clients/test_aws_cloudwatch.py @@ -0,0 +1,79 @@ +import pytest +from flask import current_app + +from app import aws_cloudwatch_client + + +def test_check_sms_no_event_error_condition(notify_api, mocker): + boto_mock = mocker.patch.object(aws_cloudwatch_client, '_client', create=True) + message_id = 'aaa' + notification_id = 'bbb' + boto_mock.filter_log_events.return_value = [] + with notify_api.app_context(): + aws_cloudwatch_client.init_app(current_app) + with pytest.raises(Exception): + aws_cloudwatch_client.check_sms(message_id, notification_id) + + +def side_effect(filterPattern, logGroupName, startTime, endTime): + if "Failure" in logGroupName and 'fail' in filterPattern: + return { + "events": + [ + { + 'logStreamName': '89db9712-c6d1-49f9-be7c-4caa7ed9efb1', + 'message': '{"delivery":{"destination":"+1661","providerResponse":"Invalid phone number"}}', + 'eventId': '37535432778099870001723210579798865345508698025292922880' + } + ] + } + + elif 'succeed' in filterPattern: + return { + "events": + [ + { + 'logStreamName': '89db9712-c6d1-49f9-be7c-4caa7ed9efb1', + 'timestamp': 1683147017911, + 'message': '{"delivery":{"destination":"+1661","providerResponse":"Phone accepted msg"}}', + 'ingestionTime': 1683147018026, + 'eventId': '37535432778099870001723210579798865345508698025292922880' + } + ] + } + else: + return {"events": []} + + +def test_check_sms_success(notify_api, mocker): + aws_cloudwatch_client.init_app(current_app) + boto_mock = mocker.patch.object(aws_cloudwatch_client, '_client', create=True) + boto_mock.filter_log_events.side_effect = side_effect + message_id = 'succeed' + notification_id = 'ccc' + with notify_api.app_context(): + aws_cloudwatch_client.check_sms(message_id, notification_id) + + # We check the 'success' log group first and if we find the message_id, we are done, so there is only 1 call + assert boto_mock.filter_log_events.call_count == 1 + mock_call = str(boto_mock.filter_log_events.mock_calls[0]) + assert 'Failure' not in mock_call + assert 'succeed' in mock_call + assert 'notification.messageId' in mock_call + + +def test_check_sms_failure(notify_api, mocker): + aws_cloudwatch_client.init_app(current_app) + boto_mock = mocker.patch.object(aws_cloudwatch_client, '_client', create=True) + boto_mock.filter_log_events.side_effect = side_effect + message_id = 'fail' + notification_id = 'bbb' + with notify_api.app_context(): + aws_cloudwatch_client.check_sms(message_id, notification_id) + + # We check the 'success' log group and find nothing, so we then check the 'fail' log group -- two calls. + assert boto_mock.filter_log_events.call_count == 2 + mock_call = str(boto_mock.filter_log_events.mock_calls[1]) + assert 'Failure' in mock_call + assert 'fail' in mock_call + assert 'notification.messageId' in mock_call diff --git a/tests/app/test_config.py b/tests/app/test_config.py index fe2fef296..23d67aafa 100644 --- a/tests/app/test_config.py +++ b/tests/app/test_config.py @@ -4,12 +4,13 @@ from app.config import QueueNames def test_queue_names_all_queues_correct(): # Need to ensure that all_queues() only returns queue names used in API queues = QueueNames.all_queues() - assert len(queues) == 15 + assert len(queues) == 16 assert set([ QueueNames.PRIORITY, QueueNames.PERIODIC, QueueNames.DATABASE, QueueNames.SEND_SMS, + QueueNames.CHECK_SMS, QueueNames.SEND_EMAIL, QueueNames.RESEARCH_MODE, QueueNames.REPORTING, From 7892c52d11c46c6f7079e97dd8a1f9a8eeddb079 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 4 May 2023 08:15:08 -0700 Subject: [PATCH 2/8] fix tests --- app/clients/cloudwatch/aws_cloudwatch.py | 2 +- tests/app/clients/test_aws_cloudwatch.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index 847b39bf9..6d5498827 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -59,7 +59,7 @@ class AwsCloudwatchClient(CloudwatchClient): return all_log_events def check_sms(self, message_id, notification_id): - # TODO presumably there is a better way to get the account number + # TODO presumably there is a better way to get the aws account number account_number = os.getenv("SES_DOMAIN_ARN") account_number = account_number.replace('arn:aws:ses:us-west-2:', '') account_number = account_number.split(":") diff --git a/tests/app/clients/test_aws_cloudwatch.py b/tests/app/clients/test_aws_cloudwatch.py index 78faa69ea..38ca50243 100644 --- a/tests/app/clients/test_aws_cloudwatch.py +++ b/tests/app/clients/test_aws_cloudwatch.py @@ -6,6 +6,10 @@ from app import aws_cloudwatch_client def test_check_sms_no_event_error_condition(notify_api, mocker): boto_mock = mocker.patch.object(aws_cloudwatch_client, '_client', create=True) + # TODO + # we do this to get the AWS account number, and it seems like unit tests locally have + # access to the env variables but when we push the PR they do not. Is there a better way to get it? + mocker.patch.dict('os.environ', {"SES_DOMAIN_ARN": "1111:"}) message_id = 'aaa' notification_id = 'bbb' boto_mock.filter_log_events.return_value = [] @@ -49,6 +53,8 @@ def test_check_sms_success(notify_api, mocker): aws_cloudwatch_client.init_app(current_app) boto_mock = mocker.patch.object(aws_cloudwatch_client, '_client', create=True) boto_mock.filter_log_events.side_effect = side_effect + mocker.patch.dict('os.environ', {"SES_DOMAIN_ARN": "1111:"}) + message_id = 'succeed' notification_id = 'ccc' with notify_api.app_context(): @@ -66,6 +72,8 @@ def test_check_sms_failure(notify_api, mocker): aws_cloudwatch_client.init_app(current_app) boto_mock = mocker.patch.object(aws_cloudwatch_client, '_client', create=True) boto_mock.filter_log_events.side_effect = side_effect + mocker.patch.dict('os.environ', {"SES_DOMAIN_ARN": "1111:"}) + message_id = 'fail' notification_id = 'bbb' with notify_api.app_context(): From 0806e967455133dde5286fd9b9bc9cd0aa189cb3 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 4 May 2023 08:21:53 -0700 Subject: [PATCH 3/8] fix dependencies --- Pipfile.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 75008054f..55c6b7e39 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -769,7 +769,7 @@ "notifications-utils": { "editable": true, "git": "https://github.com/GSA/notifications-utils.git", - "ref": "c7a85c4d1e8f975d349c1555f416ae20f28a2ce1" + "ref": "1492349e323df749771494f13e3eb268ee402a65" }, "numpy": { "hashes": [ @@ -1062,11 +1062,11 @@ }, "requests": { "hashes": [ - "sha256:e8f3c9be120d3333921d213eef078af392fba3933ab7ed2d1cba3b56f2568c3b", - "sha256:f2e34a75f4749019bb0e3effb66683630e4ffeaf75819fb51bebef1bf5aef059" + "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294", + "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4" ], "markers": "python_version >= '3.7'", - "version": "==2.29.0" + "version": "==2.30.0" }, "rfc3339-validator": { "hashes": [ @@ -2075,11 +2075,11 @@ }, "requests": { "hashes": [ - "sha256:e8f3c9be120d3333921d213eef078af392fba3933ab7ed2d1cba3b56f2568c3b", - "sha256:f2e34a75f4749019bb0e3effb66683630e4ffeaf75819fb51bebef1bf5aef059" + "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294", + "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4" ], "markers": "python_version >= '3.7'", - "version": "==2.29.0" + "version": "==2.30.0" }, "requests-mock": { "hashes": [ From 70b58f50ac53776d4dd4ba8fe1d9d87fd006122a Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 4 May 2023 08:40:16 -0700 Subject: [PATCH 4/8] add comments --- app/clients/cloudwatch/aws_cloudwatch.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index 6d5498827..57960c40a 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -11,7 +11,7 @@ from app.cloudfoundry_config import cloud_config class AwsCloudwatchClient(CloudwatchClient): """ - AwsCloudwatch cloudwatch client + This client is responsible for retrieving sms delivery receipts from cloudwatch. """ def init_app(self, current_app, *args, **kwargs): @@ -30,6 +30,8 @@ class AwsCloudwatchClient(CloudwatchClient): return 'cloudwatch' def _get_all_logs(self, my_filter, log_group_name): + + # Check all events in the last 30 minutes now = round(time.time() * 1000) beginning = now - 30 * 60 * 1000 @@ -59,6 +61,12 @@ class AwsCloudwatchClient(CloudwatchClient): return all_log_events def check_sms(self, message_id, notification_id): + """ + Go through the cloudwatch logs, filtering by message id. Check the success logs first. If we find + the message id there, we are done. Otherwise check the failure logs. If we don't find the message + in the success or failure logs, raise an exception. This method is called on a five minute delay, + which is presumably enough time for the cloudwatch log to be populated. + """ # TODO presumably there is a better way to get the aws account number account_number = os.getenv("SES_DOMAIN_ARN") account_number = account_number.replace('arn:aws:ses:us-west-2:', '') @@ -70,13 +78,9 @@ class AwsCloudwatchClient(CloudwatchClient): filter_pattern = filter_pattern.replace("XXXXX", message_id) all_log_events = self._get_all_logs(filter_pattern, log_group_name) - self.current_app.logger.warning(f"ALL EVENTS {all_log_events}") - if all_log_events and len(all_log_events) > 0: event = all_log_events[0] - self.current_app.logger.warning(f"HERE IS AN EVENT {event} of type {type(event)}") message = json.loads(event['message']) - self.current_app.logger.warning(f"HERE IS THE message {message}") return "success", message['delivery']['providerResponse'] log_group_name = f'sns/us-west-2/{account_number}/DirectPublishToPhoneNumber/Failure' @@ -86,4 +90,4 @@ class AwsCloudwatchClient(CloudwatchClient): message = json.loads(event['message']) return "fail", message['delivery']['providerResponse'] - raise Exception(f'No event found for message_id {message_id}') + raise Exception(f'No event found for message_id {message_id} notification_id {notification_id}') From b59e4df06d52d1d7faf49d69cccae6e3f02fbde3 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Fri, 5 May 2023 08:09:15 -0700 Subject: [PATCH 5/8] code review feedback --- app/__init__.py | 1 + app/celery/provider_tasks.py | 19 +++++++++++------- app/clients/cloudwatch/__init__.py | 25 ------------------------ app/clients/cloudwatch/aws_cloudwatch.py | 16 +++++---------- app/cloudfoundry_config.py | 9 +++++++++ 5 files changed, 27 insertions(+), 43 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index f39be3c22..81e5c055a 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -98,6 +98,7 @@ def create_app(application): aws_ses_stub_client.init_app( stub_url=application.config['SES_STUB_URL'] ) + aws_cloudwatch_client.init_app(application) # If a stub url is provided for SES, then use the stub client rather than the real SES boto client email_clients = [aws_ses_stub_client] if application.config['SES_STUB_URL'] else [aws_ses_client] notification_provider_clients.init_app( diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index bee645095..1fe30c92c 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -4,8 +4,7 @@ from zoneinfo import ZoneInfo from flask import current_app from sqlalchemy.orm.exc import NoResultFound -from app import notify_celery -from app.clients.cloudwatch.aws_cloudwatch import AwsCloudwatchClient +from app import aws_cloudwatch_client, notify_celery from app.clients.email import EmailClientNonRetryableException from app.clients.email.aws_ses import AwsSesClientThrottlingSendRateException from app.clients.sms import SmsClientResponseException @@ -21,12 +20,18 @@ from app.models import ( ) -@notify_celery.task(bind=True, name="check_sms_delivery_receipt", max_retries=3, default_retry_delay=300) +@notify_celery.task(bind=True, name="check_sms_delivery_receipt", max_retries=48, default_retry_delay=300) def check_sms_delivery_receipt(self, message_id, notification_id): - current_app.logger.warning(f"CHECKING DELIVERY RECEIPT for {message_id} {notification_id}") - cloudwatch_client = AwsCloudwatchClient() - cloudwatch_client.init_app(current_app) - status, provider_response = cloudwatch_client.check_sms(message_id, notification_id) + """ + This is called after deliver_sms to check the status of the message. This uses the same number of + retries and the same delay period as deliver_sms. In addition, this fires five minutes after + deliver_sms initially. So the idea is that most messages will succeed and show up in the logs quickly. + Other message will resolve successfully after a retry or to. A few will fail but it will take up to + 4 hours to know for sure. The call to check_sms will raise an exception if neither a success nor a + failure appears in the cloudwatch logs, so this should keep retrying until the log appears, or until + we run out of retries. + """ + status, provider_response = aws_cloudwatch_client.check_sms(message_id, notification_id) if status == 'success': status = NOTIFICATION_SENT else: diff --git a/app/clients/cloudwatch/__init__.py b/app/clients/cloudwatch/__init__.py index 31c79f889..e69de29bb 100644 --- a/app/clients/cloudwatch/__init__.py +++ b/app/clients/cloudwatch/__init__.py @@ -1,25 +0,0 @@ -from app.clients import Client, ClientException - - -class CloudwatchClientResponseException(ClientException): - """ - Base Exception for SmsClientsResponses - """ - - def __init__(self, message): - self.message = message - - def __str__(self): - return "Message {}".format(self.message) - - -class CloudwatchClient(Client): - """ - Base Cloudwatch client for checking sms. - """ - - def init_app(self, *args, **kwargs): - raise NotImplementedError("TODO Need to implement.") - - def check_sms(self, *args, **kwargs): - raise NotImplementedError("TODO Need to implement.") diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index 57960c40a..c6098bef5 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -5,11 +5,11 @@ import time from boto3 import client -from app.clients.cloudwatch import CloudwatchClient +from app.clients import Client from app.cloudfoundry_config import cloud_config -class AwsCloudwatchClient(CloudwatchClient): +class AwsCloudwatchClient(Client): """ This client is responsible for retrieving sms delivery receipts from cloudwatch. """ @@ -21,7 +21,7 @@ class AwsCloudwatchClient(CloudwatchClient): aws_access_key_id=cloud_config.sns_access_key, aws_secret_access_key=cloud_config.sns_secret_key ) - super(CloudwatchClient, self).__init__(*args, **kwargs) + super(Client, self).__init__(*args, **kwargs) self.current_app = current_app self._valid_sender_regex = re.compile(r"^\+?\d{5,14}$") @@ -61,14 +61,8 @@ class AwsCloudwatchClient(CloudwatchClient): return all_log_events def check_sms(self, message_id, notification_id): - """ - Go through the cloudwatch logs, filtering by message id. Check the success logs first. If we find - the message id there, we are done. Otherwise check the failure logs. If we don't find the message - in the success or failure logs, raise an exception. This method is called on a five minute delay, - which is presumably enough time for the cloudwatch log to be populated. - """ - # TODO presumably there is a better way to get the aws account number - account_number = os.getenv("SES_DOMAIN_ARN") + # TODO this clumsy approach to getting the account number will be fixed as part of notify-api #258 + account_number = cloud_config.ses_domain_arn account_number = account_number.replace('arn:aws:ses:us-west-2:', '') account_number = account_number.split(":") account_number = account_number[0] diff --git a/app/cloudfoundry_config.py b/app/cloudfoundry_config.py index 7fda0184d..62527c797 100644 --- a/app/cloudfoundry_config.py +++ b/app/cloudfoundry_config.py @@ -39,6 +39,15 @@ class CloudfoundryConfig: domain_arn = getenv('SES_DOMAIN_ARN', 'dev.notify.gov') return domain_arn.split('/')[-1] + # TODO remove this after notifications-api #258 + @property + def ses_domain_arn(self): + try: + domain_arn = self._ses_credentials('domain_arn') + except KeyError: + domain_arn = getenv('SES_DOMAIN_ARN', 'dev.notify.gov') + return domain_arn + @property def ses_region(self): try: From 10cb0df2e0882be644b8fabb39c6b344732fa4fc Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Fri, 5 May 2023 08:32:29 -0700 Subject: [PATCH 6/8] fix flake8 --- app/clients/cloudwatch/aws_cloudwatch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index c6098bef5..e3e351559 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -1,5 +1,4 @@ import json -import os import re import time From 1e72f97b17894b5bb75f122bf9232ee43b7ccfe2 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 9 May 2023 08:45:51 -0700 Subject: [PATCH 7/8] code review feedback --- app/celery/provider_tasks.py | 10 +++++++--- app/clients/cloudwatch/aws_cloudwatch.py | 17 ++++++++++------- tests/app/clients/test_aws_cloudwatch.py | 4 ++-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 1fe30c92c..01d826ba6 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +from time import time from zoneinfo import ZoneInfo from flask import current_app @@ -21,7 +22,7 @@ from app.models import ( @notify_celery.task(bind=True, name="check_sms_delivery_receipt", max_retries=48, default_retry_delay=300) -def check_sms_delivery_receipt(self, message_id, notification_id): +def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): """ This is called after deliver_sms to check the status of the message. This uses the same number of retries and the same delay period as deliver_sms. In addition, this fires five minutes after @@ -31,17 +32,20 @@ def check_sms_delivery_receipt(self, message_id, notification_id): failure appears in the cloudwatch logs, so this should keep retrying until the log appears, or until we run out of retries. """ - status, provider_response = aws_cloudwatch_client.check_sms(message_id, notification_id) + status, provider_response = aws_cloudwatch_client.check_sms(message_id, notification_id, sent_at) if status == 'success': status = NOTIFICATION_SENT else: status = NOTIFICATION_FAILED update_notification_status_by_id(notification_id, status, provider_response=provider_response) + current_app.logger.info(f"Updated notification {notification_id} with response '{provider_response}'") @notify_celery.task(bind=True, name="deliver_sms", max_retries=48, default_retry_delay=300) def deliver_sms(self, notification_id): try: + # Get the time we are doing the sending, to minimize the time period we need to check over for receipt + now = round(time() * 1000) current_app.logger.info("Start sending SMS for notification id: {}".format(notification_id)) notification = notifications_dao.get_notification_by_id(notification_id) if not notification: @@ -51,7 +55,7 @@ def deliver_sms(self, notification_id): # will be ignored and it will fire immediately (although this probably only affects developer testing) my_eta = datetime.now(ZoneInfo('US/Eastern')) + timedelta(seconds=300) check_sms_delivery_receipt.apply_async( - [message_id, notification_id], + [message_id, notification_id, now], eta=my_eta, queue=QueueNames.CHECK_SMS ) diff --git a/app/clients/cloudwatch/aws_cloudwatch.py b/app/clients/cloudwatch/aws_cloudwatch.py index e3e351559..97de58219 100644 --- a/app/clients/cloudwatch/aws_cloudwatch.py +++ b/app/clients/cloudwatch/aws_cloudwatch.py @@ -28,12 +28,11 @@ class AwsCloudwatchClient(Client): def name(self): return 'cloudwatch' - def _get_all_logs(self, my_filter, log_group_name): + def _get_log(self, my_filter, log_group_name, sent_at): - # Check all events in the last 30 minutes + # Check all cloudwatch logs from the time the notification was sent (currently 5 minutes previously) until now now = round(time.time() * 1000) - beginning = now - 30 * 60 * 1000 - + beginning = sent_at next_token = None all_log_events = [] while True: @@ -54,12 +53,16 @@ class AwsCloudwatchClient(Client): ) log_events = response.get('events', []) all_log_events.extend(log_events) + if len(log_events) > 0: + # We found it + break next_token = response.get('nextToken') if not next_token: break return all_log_events - def check_sms(self, message_id, notification_id): + def check_sms(self, message_id, notification_id, created_at): + # TODO this clumsy approach to getting the account number will be fixed as part of notify-api #258 account_number = cloud_config.ses_domain_arn account_number = account_number.replace('arn:aws:ses:us-west-2:', '') @@ -69,7 +72,7 @@ class AwsCloudwatchClient(Client): log_group_name = f'sns/us-west-2/{account_number}/DirectPublishToPhoneNumber' filter_pattern = '{$.notification.messageId="XXXXX"}' filter_pattern = filter_pattern.replace("XXXXX", message_id) - all_log_events = self._get_all_logs(filter_pattern, log_group_name) + all_log_events = self._get_log(filter_pattern, log_group_name, created_at) if all_log_events and len(all_log_events) > 0: event = all_log_events[0] @@ -77,7 +80,7 @@ class AwsCloudwatchClient(Client): return "success", message['delivery']['providerResponse'] log_group_name = f'sns/us-west-2/{account_number}/DirectPublishToPhoneNumber/Failure' - all_failed_events = self._get_all_logs(filter_pattern, log_group_name) + all_failed_events = self._get_log(filter_pattern, log_group_name, created_at) if all_failed_events and len(all_failed_events) > 0: event = all_failed_events[0] message = json.loads(event['message']) diff --git a/tests/app/clients/test_aws_cloudwatch.py b/tests/app/clients/test_aws_cloudwatch.py index 38ca50243..5a54383b5 100644 --- a/tests/app/clients/test_aws_cloudwatch.py +++ b/tests/app/clients/test_aws_cloudwatch.py @@ -58,7 +58,7 @@ def test_check_sms_success(notify_api, mocker): message_id = 'succeed' notification_id = 'ccc' with notify_api.app_context(): - aws_cloudwatch_client.check_sms(message_id, notification_id) + aws_cloudwatch_client.check_sms(message_id, notification_id, 1000000000000) # We check the 'success' log group first and if we find the message_id, we are done, so there is only 1 call assert boto_mock.filter_log_events.call_count == 1 @@ -77,7 +77,7 @@ def test_check_sms_failure(notify_api, mocker): message_id = 'fail' notification_id = 'bbb' with notify_api.app_context(): - aws_cloudwatch_client.check_sms(message_id, notification_id) + aws_cloudwatch_client.check_sms(message_id, notification_id, 1000000000000) # We check the 'success' log group and find nothing, so we then check the 'fail' log group -- two calls. assert boto_mock.filter_log_events.call_count == 2 From 7a7de4de10949ff56be848bd7dbeebf8002da841 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 9 May 2023 09:00:04 -0700 Subject: [PATCH 8/8] fix pipfile.lock --- Pipfile.lock | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 55c6b7e39..dfab2a778 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -158,11 +158,11 @@ }, "certifi": { "hashes": [ - "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", - "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" + "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7", + "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716" ], "index": "pypi", - "version": "==2022.12.7" + "version": "==2023.5.7" }, "cffi": { "hashes": [ @@ -1055,10 +1055,10 @@ }, "redis": { "hashes": [ - "sha256:2c19e6767c474f2e85167909061d525ed65bea9301c0770bb151e041b7ac89a2", - "sha256:73ec35da4da267d6847e47f68730fdd5f62e2ca69e3ef5885c6a78a9374c3893" + "sha256:77929bc7f5dab9adf3acba2d3bb7d7658f1e0c2f1cafe7eb36434e751c471119", + "sha256:dc87a0bdef6c8bfe1ef1e1c40be7034390c2ae02d92dcd0c7ca1729443899880" ], - "version": "==4.5.4" + "version": "==4.5.5" }, "requests": { "hashes": [ @@ -1268,11 +1268,11 @@ }, "werkzeug": { "hashes": [ - "sha256:4866679a0722de00796a74086238bb3b98d90f423f05de039abb09315487254a", - "sha256:a987caf1092edc7523edb139edb20c70571c4a8d5eed02e0b547b4739174d091" + "sha256:1d5a58e0377d1fe39d061a5de4469e414e78ccb1e1e59c0f5ad6fa1c36c52b76", + "sha256:48e5e61472fee0ddee27ebad085614ebedb7af41e88f687aaf881afb723a162f" ], "index": "pypi", - "version": "==2.3.3" + "version": "==2.3.4" }, "wrapt": { "hashes": [ @@ -1410,11 +1410,11 @@ }, "certifi": { "hashes": [ - "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", - "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" + "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7", + "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716" ], "index": "pypi", - "version": "==2022.12.7" + "version": "==2023.5.7" }, "cffi": { "hashes": [ @@ -2191,11 +2191,11 @@ }, "werkzeug": { "hashes": [ - "sha256:4866679a0722de00796a74086238bb3b98d90f423f05de039abb09315487254a", - "sha256:a987caf1092edc7523edb139edb20c70571c4a8d5eed02e0b547b4739174d091" + "sha256:1d5a58e0377d1fe39d061a5de4469e414e78ccb1e1e59c0f5ad6fa1c36c52b76", + "sha256:48e5e61472fee0ddee27ebad085614ebedb7af41e88f687aaf881afb723a162f" ], "index": "pypi", - "version": "==2.3.3" + "version": "==2.3.4" }, "xmltodict": { "hashes": [