mirror of
https://github.com/GSA/notifications-api.git
synced 2025-12-20 15:31:15 -05:00
Change which letters we want to be sent to look at all days
Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.
Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.
Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.
Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.
This commit is contained in:
@@ -19,6 +19,12 @@ def get_s3_object(bucket_name, file_location):
|
|||||||
return s3.Object(bucket_name, file_location)
|
return s3.Object(bucket_name, file_location)
|
||||||
|
|
||||||
|
|
||||||
|
def head_s3_object(bucket_name, file_location):
|
||||||
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.head_object
|
||||||
|
boto_client = client('s3', current_app.config['AWS_REGION'])
|
||||||
|
return boto_client.head_object(Bucket=bucket_name, Key=file_location)
|
||||||
|
|
||||||
|
|
||||||
def file_exists(bucket_name, file_location):
|
def file_exists(bucket_name, file_location):
|
||||||
try:
|
try:
|
||||||
# try and access metadata of object
|
# try and access metadata of object
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import math
|
import math
|
||||||
import base64
|
import base64
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from hashlib import sha512
|
from hashlib import sha512
|
||||||
from base64 import urlsafe_b64encode
|
from base64 import urlsafe_b64encode
|
||||||
@@ -14,6 +14,8 @@ from requests import (
|
|||||||
from celery.exceptions import MaxRetriesExceededError
|
from celery.exceptions import MaxRetriesExceededError
|
||||||
from notifications_utils.statsd_decorators import statsd
|
from notifications_utils.statsd_decorators import statsd
|
||||||
from notifications_utils.s3 import s3upload
|
from notifications_utils.s3 import s3upload
|
||||||
|
from notifications_utils.letter_timings import LETTER_PROCESSING_DEADLINE
|
||||||
|
from notifications_utils.timezones import convert_utc_to_bst
|
||||||
|
|
||||||
from app import encryption, notify_celery
|
from app import encryption, notify_celery
|
||||||
from app.aws import s3
|
from app.aws import s3
|
||||||
@@ -25,7 +27,9 @@ from app.dao.notifications_dao import (
|
|||||||
dao_get_notification_by_reference,
|
dao_get_notification_by_reference,
|
||||||
dao_get_notifications_by_references,
|
dao_get_notifications_by_references,
|
||||||
dao_update_notifications_by_reference,
|
dao_update_notifications_by_reference,
|
||||||
|
dao_get_letters_to_be_printed,
|
||||||
)
|
)
|
||||||
|
from app.letters.utils import get_letter_pdf_filename
|
||||||
from app.errors import VirusScanError
|
from app.errors import VirusScanError
|
||||||
from app.exceptions import NotificationTechnicalFailureException
|
from app.exceptions import NotificationTechnicalFailureException
|
||||||
from app.letters.utils import (
|
from app.letters.utils import (
|
||||||
@@ -117,26 +121,31 @@ def get_letters_pdf(template, contact_block, filename, values):
|
|||||||
|
|
||||||
@notify_celery.task(name='collate-letter-pdfs-for-day')
|
@notify_celery.task(name='collate-letter-pdfs-for-day')
|
||||||
@cronitor("collate-letter-pdfs-for-day")
|
@cronitor("collate-letter-pdfs-for-day")
|
||||||
def collate_letter_pdfs_for_day(date=None):
|
def collate_letter_pdfs_for_day():
|
||||||
if not date:
|
"""
|
||||||
# Using the truncated date is ok because UTC to BST does not make a difference to the date,
|
Finds all letters which are still waiting to be sent to DVLA for printing
|
||||||
# since it is triggered mid afternoon.
|
|
||||||
date = datetime.utcnow().strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
letter_pdfs = sorted(
|
This would usually be run at 5.50pm and collect up letters created between before 5:30pm today
|
||||||
s3.get_s3_bucket_objects(
|
that have not yet been sent.
|
||||||
current_app.config['LETTERS_PDF_BUCKET_NAME'],
|
If run after midnight, it will collect up letters created before 5:30pm the day before.
|
||||||
subfolder=date
|
"""
|
||||||
),
|
date = convert_utc_to_bst(datetime.utcnow())
|
||||||
key=lambda letter: letter['Key']
|
if date.time() < LETTER_PROCESSING_DEADLINE:
|
||||||
)
|
date = date - timedelta(days=1)
|
||||||
for i, letters in enumerate(group_letters(letter_pdfs)):
|
|
||||||
|
# Using the truncated date is ok because UTC to BST does not make a difference to the date,
|
||||||
|
# since it is triggered mid afternoon.
|
||||||
|
print_run_date = date.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
letters_to_print = get_key_and_size_of_letters_to_be_sent_to_print(print_run_date)
|
||||||
|
|
||||||
|
for i, letters in enumerate(group_letters(letters_to_print)):
|
||||||
filenames = [letter['Key'] for letter in letters]
|
filenames = [letter['Key'] for letter in letters]
|
||||||
|
|
||||||
hash = urlsafe_b64encode(sha512(''.join(filenames).encode()).digest())[:20].decode()
|
hash = urlsafe_b64encode(sha512(''.join(filenames).encode()).digest())[:20].decode()
|
||||||
# eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP
|
# eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP
|
||||||
dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format(
|
dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format(
|
||||||
date=date,
|
date=print_run_date,
|
||||||
num=i + 1,
|
num=i + 1,
|
||||||
hash=hash
|
hash=hash
|
||||||
)
|
)
|
||||||
@@ -159,6 +168,28 @@ def collate_letter_pdfs_for_day(date=None):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_key_and_size_of_letters_to_be_sent_to_print(print_run_date):
|
||||||
|
letters_awaiting_sending = dao_get_letters_to_be_printed(print_run_date)
|
||||||
|
|
||||||
|
letter_pdfs = []
|
||||||
|
for letter in letters_awaiting_sending:
|
||||||
|
letter_file_name = get_letter_pdf_filename(
|
||||||
|
reference=letter.reference,
|
||||||
|
crown=letter.service.crown,
|
||||||
|
sending_date=letter.created_at,
|
||||||
|
postage=letter.postage
|
||||||
|
)
|
||||||
|
|
||||||
|
current_app.logger.info(
|
||||||
|
f'Found notification {letter.id} to send to DVLA to print: {letter_file_name}'
|
||||||
|
)
|
||||||
|
|
||||||
|
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
|
||||||
|
letter_pdfs.append({"Key": letter_file_name, "Size": letter_head['ContentLength']})
|
||||||
|
|
||||||
|
return letter_pdfs
|
||||||
|
|
||||||
|
|
||||||
def group_letters(letter_pdfs):
|
def group_letters(letter_pdfs):
|
||||||
"""
|
"""
|
||||||
Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size.
|
Group letters in chunks of MAX_LETTER_PDF_ZIP_FILESIZE. Will add files to lists, never going over that size.
|
||||||
|
|||||||
@@ -731,6 +731,24 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ
|
|||||||
return notifications
|
return notifications
|
||||||
|
|
||||||
|
|
||||||
|
def dao_get_letters_to_be_printed(print_run_date):
|
||||||
|
"""
|
||||||
|
Given a date for a print run, Return all letters created before 5:30pm that day that have not yet been sent
|
||||||
|
"""
|
||||||
|
last_processing_deadline = datetime.strptime(print_run_date, "%Y-%m-%d").replace(
|
||||||
|
hour=17, minute=30, second=0, microsecond=0
|
||||||
|
)
|
||||||
|
|
||||||
|
notifications = Notification.query.filter(
|
||||||
|
Notification.created_at < convert_bst_to_utc(last_processing_deadline),
|
||||||
|
Notification.notification_type == LETTER_TYPE,
|
||||||
|
Notification.status == NOTIFICATION_CREATED
|
||||||
|
).order_by(
|
||||||
|
Notification.created_at
|
||||||
|
).all()
|
||||||
|
return notifications
|
||||||
|
|
||||||
|
|
||||||
def dao_old_letters_with_created_status():
|
def dao_old_letters_with_created_status():
|
||||||
yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1)
|
yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1)
|
||||||
last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0)
|
last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from unittest.mock import Mock, call, ANY
|
|||||||
|
|
||||||
import base64
|
import base64
|
||||||
import boto3
|
import boto3
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from moto import mock_s3
|
from moto import mock_s3
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
from freezegun import freeze_time
|
from freezegun import freeze_time
|
||||||
@@ -20,6 +20,7 @@ from app.celery.letters_pdf_tasks import (
|
|||||||
create_letters_pdf,
|
create_letters_pdf,
|
||||||
get_letters_pdf,
|
get_letters_pdf,
|
||||||
collate_letter_pdfs_for_day,
|
collate_letter_pdfs_for_day,
|
||||||
|
get_key_and_size_of_letters_to_be_sent_to_print,
|
||||||
group_letters,
|
group_letters,
|
||||||
letter_in_created_state,
|
letter_in_created_state,
|
||||||
process_sanitised_letter,
|
process_sanitised_letter,
|
||||||
@@ -242,27 +243,113 @@ def test_create_letters_gets_the_right_logo_when_service_has_letter_branding_log
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_collate_letter_pdfs_for_day(notify_api, mocker):
|
@freeze_time('2020-02-17 18:00:00')
|
||||||
mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects', return_value=[
|
def test_get_key_and_size_of_letters_to_be_sent_to_print(notify_api, mocker, sample_letter_template):
|
||||||
{'Key': 'B.pDf', 'Size': 2},
|
create_notification(
|
||||||
{'Key': 'A.PDF', 'Size': 1},
|
template=sample_letter_template,
|
||||||
{'Key': 'C.pdf', 'Size': 3}
|
status='created',
|
||||||
|
reference='ref0',
|
||||||
|
created_at=(datetime.now() - timedelta(hours=2)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref1',
|
||||||
|
created_at=(datetime.now() - timedelta(hours=3)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref2',
|
||||||
|
created_at=(datetime.now() - timedelta(days=2)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
# notifications we don't expect to get sent to print as they are in the wrong status
|
||||||
|
for status in ['delivered', 'validation-failed', 'cancelled', 'sending']:
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status=status,
|
||||||
|
reference='ref3',
|
||||||
|
created_at=(datetime.now() - timedelta(days=2)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
# notification we don't expect to get sent as instead will make into this evenings print run
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref4',
|
||||||
|
created_at=(datetime.now() - timedelta(minutes=1)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_s3 = mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[
|
||||||
|
{'ContentLength': 2},
|
||||||
|
{'ContentLength': 1},
|
||||||
|
{'ContentLength': 3},
|
||||||
])
|
])
|
||||||
mock_group_letters = mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[
|
|
||||||
[{'Key': 'A.PDF', 'Size': 1}, {'Key': 'B.pDf', 'Size': 2}],
|
results = get_key_and_size_of_letters_to_be_sent_to_print('2020-02-17')
|
||||||
[{'Key': 'C.pdf', 'Size': 3}]
|
|
||||||
|
assert mock_s3.call_count == 3
|
||||||
|
mock_s3.assert_has_calls(
|
||||||
|
[
|
||||||
|
call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF'),
|
||||||
|
call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF'),
|
||||||
|
call(current_app.config['LETTERS_PDF_BUCKET_NAME'], '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF'),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 3
|
||||||
|
assert results == [
|
||||||
|
{'Key': '2020-02-16/NOTIFY.REF2.D.2.C.C.20200215180000.PDF', 'Size': 2},
|
||||||
|
{'Key': '2020-02-17/NOTIFY.REF1.D.2.C.C.20200217150000.PDF', 'Size': 1},
|
||||||
|
{'Key': '2020-02-17/NOTIFY.REF0.D.2.C.C.20200217160000.PDF', 'Size': 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@freeze_time('2020-02-17 18:00:00')
|
||||||
|
def test_collate_letter_pdfs_for_day(notify_api, sample_letter_template, mocker):
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref0',
|
||||||
|
created_at=(datetime.now() - timedelta(hours=2)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref1',
|
||||||
|
created_at=(datetime.now() - timedelta(hours=3)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref2',
|
||||||
|
created_at=(datetime.now() - timedelta(days=2)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[
|
||||||
|
{'ContentLength': 2},
|
||||||
|
{'ContentLength': 1},
|
||||||
|
{'ContentLength': 3},
|
||||||
|
])
|
||||||
|
mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[
|
||||||
|
[{'Key': '2020-02-16/A.PDF', 'Size': 1}, {'Key': '2020-02-17/B.pDf', 'Size': 2}],
|
||||||
|
[{'Key': '2020-02-17/C.pdf', 'Size': 3}]
|
||||||
])
|
])
|
||||||
mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task')
|
mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task')
|
||||||
|
|
||||||
collate_letter_pdfs_for_day('2017-01-02')
|
collate_letter_pdfs_for_day()
|
||||||
|
|
||||||
mock_s3.assert_called_once_with('test-letters-pdf', subfolder='2017-01-02')
|
assert len(mock_celery.call_args_list) == 2
|
||||||
mock_group_letters.assert_called_once_with(sorted(mock_s3.return_value, key=lambda x: x['Key']))
|
|
||||||
assert mock_celery.call_args_list[0] == call(
|
assert mock_celery.call_args_list[0] == call(
|
||||||
name='zip-and-send-letter-pdfs',
|
name='zip-and-send-letter-pdfs',
|
||||||
kwargs={
|
kwargs={
|
||||||
'filenames_to_zip': ['A.PDF', 'B.pDf'],
|
'filenames_to_zip': ['2020-02-16/A.PDF', '2020-02-17/B.pDf'],
|
||||||
'upload_filename': 'NOTIFY.2017-01-02.001.oqdjIM2-NAUU9Sm5Slmi.ZIP'
|
'upload_filename': 'NOTIFY.2020-02-17.001.fGRjtoP1V9XUrng24Zzq.ZIP'
|
||||||
},
|
},
|
||||||
queue='process-ftp-tasks',
|
queue='process-ftp-tasks',
|
||||||
compression='zlib'
|
compression='zlib'
|
||||||
@@ -270,20 +357,70 @@ def test_collate_letter_pdfs_for_day(notify_api, mocker):
|
|||||||
assert mock_celery.call_args_list[1] == call(
|
assert mock_celery.call_args_list[1] == call(
|
||||||
name='zip-and-send-letter-pdfs',
|
name='zip-and-send-letter-pdfs',
|
||||||
kwargs={
|
kwargs={
|
||||||
'filenames_to_zip': ['C.pdf'],
|
'filenames_to_zip': ['2020-02-17/C.pdf'],
|
||||||
'upload_filename': 'NOTIFY.2017-01-02.002.tdr7hcdPieiqjkVoS4kU.ZIP'
|
'upload_filename': 'NOTIFY.2020-02-17.002.Ay8mP80pe12NH46clM08.ZIP'
|
||||||
},
|
},
|
||||||
queue='process-ftp-tasks',
|
queue='process-ftp-tasks',
|
||||||
compression='zlib'
|
compression='zlib'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@freeze_time('2018-09-12 17:50:00')
|
@freeze_time('2020-02-18 02:00:00')
|
||||||
def test_collate_letter_pdfs_for_day_works_without_date_param(notify_api, mocker):
|
def test_collate_letter_pdfs_for_day_when_run_after_midnight(notify_api, sample_letter_template, mocker):
|
||||||
mock_s3 = mocker.patch('app.celery.tasks.s3.get_s3_bucket_objects')
|
# created_at times for notifications choosen to match times in above test test_collate_letter_pdfs_for_day
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref0',
|
||||||
|
created_at=(datetime.now() - timedelta(hours=10)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref1',
|
||||||
|
created_at=(datetime.now() - timedelta(hours=11)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
create_notification(
|
||||||
|
template=sample_letter_template,
|
||||||
|
status='created',
|
||||||
|
reference='ref2',
|
||||||
|
created_at=(datetime.now() - timedelta(days=2, hours=8)).isoformat()
|
||||||
|
)
|
||||||
|
|
||||||
|
mocker.patch('app.celery.tasks.s3.head_s3_object', side_effect=[
|
||||||
|
{'ContentLength': 2},
|
||||||
|
{'ContentLength': 1},
|
||||||
|
{'ContentLength': 3},
|
||||||
|
])
|
||||||
|
mocker.patch('app.celery.letters_pdf_tasks.group_letters', return_value=[
|
||||||
|
[{'Key': '2020-02-16/A.PDF', 'Size': 1}, {'Key': '2020-02-17/B.pDf', 'Size': 2}],
|
||||||
|
[{'Key': '2020-02-17/C.pdf', 'Size': 3}]
|
||||||
|
])
|
||||||
|
mock_celery = mocker.patch('app.celery.letters_pdf_tasks.notify_celery.send_task')
|
||||||
|
|
||||||
collate_letter_pdfs_for_day()
|
collate_letter_pdfs_for_day()
|
||||||
expected_date = '2018-09-12'
|
|
||||||
mock_s3.assert_called_once_with('test-letters-pdf', subfolder=expected_date)
|
assert len(mock_celery.call_args_list) == 2
|
||||||
|
assert mock_celery.call_args_list[0] == call(
|
||||||
|
name='zip-and-send-letter-pdfs',
|
||||||
|
kwargs={
|
||||||
|
'filenames_to_zip': ['2020-02-16/A.PDF', '2020-02-17/B.pDf'],
|
||||||
|
'upload_filename': 'NOTIFY.2020-02-17.001.fGRjtoP1V9XUrng24Zzq.ZIP'
|
||||||
|
},
|
||||||
|
queue='process-ftp-tasks',
|
||||||
|
compression='zlib'
|
||||||
|
)
|
||||||
|
assert mock_celery.call_args_list[1] == call(
|
||||||
|
name='zip-and-send-letter-pdfs',
|
||||||
|
kwargs={
|
||||||
|
'filenames_to_zip': ['2020-02-17/C.pdf'],
|
||||||
|
'upload_filename': 'NOTIFY.2020-02-17.002.Ay8mP80pe12NH46clM08.ZIP'
|
||||||
|
},
|
||||||
|
queue='process-ftp-tasks',
|
||||||
|
compression='zlib'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_group_letters_splits_on_file_size(notify_api, mocker):
|
def test_group_letters_splits_on_file_size(notify_api, mocker):
|
||||||
|
|||||||
Reference in New Issue
Block a user