add a hash of letter filenames to the dvla zip file name

if we partially retry a day, we would create new zip files, containing
different letters (if some were processed succesfully). We need these
files to have different filenames to earlier zip files so that we can
avoid overwriting log data in zips_sent.

Hashing the filename means that we'll only overwrite if it was the same
file containing the same content.
This commit is contained in:
Leo Hemsted
2019-03-21 15:40:24 +00:00
parent 31d4bfed84
commit b288031adb
2 changed files with 31 additions and 9 deletions

View File

@@ -2,6 +2,8 @@ import io
import math
from datetime import datetime
from uuid import UUID
from hashlib import sha512
from base64 import urlsafe_b64encode
from PyPDF2.utils import PdfReadError
from botocore.exceptions import ClientError as BotoClientError
@@ -115,14 +117,24 @@ def collate_letter_pdfs_for_day(date=None):
# since it is triggered mid afternoon.
date = datetime.utcnow().strftime("%Y-%m-%d")
letter_pdfs = s3.get_s3_bucket_objects(
current_app.config['LETTERS_PDF_BUCKET_NAME'],
subfolder=date
letter_pdfs = sorted(
s3.get_s3_bucket_objects(
current_app.config['LETTERS_PDF_BUCKET_NAME'],
subfolder=date
),
key=lambda letter: letter['Key']
)
for i, letters in enumerate(group_letters(letter_pdfs)):
# eg NOTIFY.2018-12-31.001.ZIP
dvla_filename = 'NOTIFY.{date}.{num:03}.ZIP'.format(date=date, num=i + 1)
filenames = [letter['Key'] for letter in letters]
hash = urlsafe_b64encode(sha512(''.join(filenames).encode()).digest())[:20].decode()
# eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP
dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format(
date=date,
num=i + 1,
hash=hash
)
current_app.logger.info(
'Calling task zip-and-send-letter-pdfs for {} pdfs to upload {} with total size {:,} bytes'.format(
len(filenames),