Merge pull request #3014 from alphagov/get-query-in-50k-batches

Optimise dao_get_letters_to_be_printed query
This commit is contained in:
Leo Hemsted
2020-10-26 13:23:17 +00:00
committed by GitHub
3 changed files with 42 additions and 13 deletions

View File

@@ -182,15 +182,15 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage)
try:
letter_file_name = get_letter_pdf_filename(
reference=letter.reference,
crown=letter.service.crown,
crown=letter.crown,
created_at=letter.created_at,
postage=letter.postage
postage=postage
)
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
yield {
"Key": letter_file_name,
"Size": letter_head['ContentLength'],
"ServiceId": str(letter.service.id)
"ServiceId": str(letter.service_id)
}
except BotoClientError as e:
current_app.logger.exception(

View File

@@ -717,11 +717,30 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ
return notifications
def dao_get_letters_to_be_printed(print_run_deadline, postage):
def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000):
"""
Return all letters created before the print run deadline that have not yet been sent
Return all letters created before the print run deadline that have not yet been sent. This yields in batches of 10k
to prevent the query taking too long and eating up too much memory. As each 10k batch is yielded, the
get_key_and_size_of_letters_to_be_sent_to_print function will go and fetch the s3 data, andhese start sending off
tasks to the notify-ftp app to send them.
CAUTION! Modify this query with caution. Modifying filters etc is fine, but if we join onto another table, then
there may be undefined behaviour. Essentially we need each ORM object returned for each row to be unique,
and we should avoid modifying state of returned objects.
For more reading:
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html
"""
notifications = Notification.query.filter(
notifications = db.session.query(
Notification.id,
Notification.created_at,
Notification.reference,
Notification.service_id,
Service.crown,
).join(
Notification.service
).filter(
Notification.created_at < convert_bst_to_utc(print_run_deadline),
Notification.notification_type == LETTER_TYPE,
Notification.status == NOTIFICATION_CREATED,
@@ -730,7 +749,7 @@ def dao_get_letters_to_be_printed(print_run_deadline, postage):
).order_by(
Notification.service_id,
Notification.created_at
).limit(50000)
).yield_per(query_limit)
return notifications