diff --git a/app/aws/s3.py b/app/aws/s3.py index 6771921e7..1b8d436d3 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -109,7 +109,7 @@ def upload_letters_pdf(reference, crown, filedata): upload_file_name, current_app.config['LETTERS_PDF_BUCKET_NAME'])) -def get_list_of_files_by_suffix(bucket_name, subfolder='', suffix=''): +def get_list_of_files_by_suffix(bucket_name, subfolder='', suffix='', last_modified=None): s3_client = client('s3', current_app.config['AWS_REGION']) paginator = s3_client.get_paginator('list_objects_v2') @@ -120,6 +120,7 @@ def get_list_of_files_by_suffix(bucket_name, subfolder='', suffix=''): for page in page_iterator: for obj in page['Contents']: - key = obj['Key'] - if key.endswith(suffix): - yield key + key = obj['Key'].lower() + if key.endswith(suffix.lower()): + if not last_modified or obj['LastModified'] >= last_modified: + yield key diff --git a/app/celery/scheduled_tasks.py b/app/celery/scheduled_tasks.py index ca082657f..6f840ad6b 100644 --- a/app/celery/scheduled_tasks.py +++ b/app/celery/scheduled_tasks.py @@ -1,3 +1,4 @@ +import pytz from datetime import ( date, datetime, @@ -452,29 +453,39 @@ def daily_stats_template_usage_by_month(): @notify_celery.task(name='raise-alert-if-no-letter-ack-file') @statsd(namespace="tasks") def letter_raise_alert_if_no_ack_file_for_zip(): - # get a list of today's zip files + # get a list of zip files since yesterday zip_file_list = [] + for key in s3.get_list_of_files_by_suffix(bucket_name=current_app.config['LETTERS_PDF_BUCKET_NAME'], - subfolder=datetime.utcnow().strftime('%Y-%m-%d'), suffix='.ZIP'): + subfolder=datetime.utcnow().strftime('%Y-%m-%d'), + suffix='.zip'): zip_file_list.append(key) # get acknowledgement file ack_file_list = [] + yesterday = datetime.now(tz=pytz.utc) - timedelta(days=1) for key in s3.get_list_of_files_by_suffix(bucket_name=current_app.config['DVLA_RESPONSE_BUCKET_NAME'], - subfolder='root/dispatch', suffix='.ACK.txt'): + subfolder='root/dispatch', suffix='.ACK.txt', lastModified=yesterday): ack_file_list.append(key) - todaystr = datetime.utcnow().strftime('%Y%m%d') + today_str = datetime.utcnow().strftime('%Y%m%d') + zip_not_today = [] for key in ack_file_list: - if todaystr in key: + if today_str in key: content = s3.get_s3_file(current_app.config['DVLA_RESPONSE_BUCKET_NAME'], key) - for zip_file in content.split('\n'): # each line s = zip_file.split('|') for zf in zip_file_list: - if s[0] in zf: + if s[0].lower() in zf.lower(): zip_file_list.remove(zf) + else: + zip_not_today.append(s[0]) if zip_file_list: raise NoAckFileReceived(message=zip_file_list) + + if zip_not_today: + current_app.logger.info( + "letter ack contains zip that is not for today {} ".format(zip_not_today) + ) diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index 23f890b9f..d4d3f196c 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,7 +1,7 @@ from unittest.mock import call from datetime import datetime, timedelta import pytest - +import pytz from flask import current_app from freezegun import freeze_time @@ -11,7 +11,8 @@ from app.aws.s3 import ( get_s3_file, filter_s3_bucket_objects_within_date_range, remove_transformed_dvla_file, - upload_letters_pdf + upload_letters_pdf, + get_list_of_files_by_suffix, ) from tests.app.conftest import datetime_in_past @@ -173,3 +174,37 @@ def test_upload_letters_pdf_puts_in_tomorrows_bucket_after_half_five(notify_api, # in tomorrow's folder, but still has this evening's timestamp file_location='2017-12-05/NOTIFY.FOO.D.2.C.C.20171204173100.PDF' ) + + +@freeze_time("2018-01-11 00:00:00") +@pytest.mark.parametrize('suffix_str, days_before, returned_no', [ + ('.ACK.txt', None, 1), + ('.ack.txt', None, 1), + ('.ACK.TXT', None, 1), + ('', None, 2), + ('', 1, 1), +]) +def test_get_list_of_files_by_suffix(notify_api, mocker, suffix_str, days_before, returned_no): + paginator_mock = mocker.patch('app.aws.s3.client') + multiple_pages_s3_object = [ + { + "Contents": [ + single_s3_object_stub('bar/foo.ACK.txt', datetime_in_past(1, 0)), + ] + }, + { + "Contents": [ + single_s3_object_stub('bar/foo1.rs.txt', datetime_in_past(2, 0)), + ] + } + ] + paginator_mock.return_value.get_paginator.return_value.paginate.return_value = multiple_pages_s3_object + if (days_before): + key = get_list_of_files_by_suffix('foo-bucket', subfolder='bar', suffix=suffix_str, + last_modified=datetime.now(tz=pytz.utc) - timedelta(days=days_before)) + else: + key = get_list_of_files_by_suffix('foo-bucket', subfolder='bar', suffix=suffix_str) + + assert sum(1 for x in key) == returned_no + for k in key: + assert k == 'bar/foo.ACK.txt' diff --git a/tests/app/celery/test_scheduled_tasks.py b/tests/app/celery/test_scheduled_tasks.py index 2dfef2623..e600f1748 100644 --- a/tests/app/celery/test_scheduled_tasks.py +++ b/tests/app/celery/test_scheduled_tasks.py @@ -1029,20 +1029,18 @@ def test_dao_fetch_monthly_historical_stats_by_template_null_template_id_not_cou assert len(result) == 1 -def mock_s3_get_list_match(bucket_name, subfolder='', suffix=''): +def mock_s3_get_list_match(bucket_name, subfolder='', suffix='', lastModified=None): if subfolder == '2018-01-11': return ['NOTIFY.20180111175007.ZIP', 'NOTIFY.20180111175008.ZIP'] - print(suffix) if subfolder == 'root/dispatch': return ['root/dispatch/NOTIFY.20180111175733.ACK.txt'] -def mock_s3_get_list_diff(bucket_name, subfolder='', suffix=''): +def mock_s3_get_list_diff(bucket_name, subfolder='', suffix='', lastModified=None): if subfolder == '2018-01-11': return ['NOTIFY.20180111175007.ZIP', 'NOTIFY.20180111175008.ZIP', 'NOTIFY.20180111175009.ZIP', 'NOTIFY.20180111175010.ZIP'] - print(suffix) if subfolder == 'root/dispatch': return ['root/dispatch/NOTIFY.20180111175733.ACK.txt']