mirror of
https://github.com/GSA/notifications-api.git
synced 2026-01-30 06:21:50 -05:00
Merge pull request #3014 from alphagov/get-query-in-50k-batches
Optimise dao_get_letters_to_be_printed query
This commit is contained in:
@@ -182,15 +182,15 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage)
|
||||
try:
|
||||
letter_file_name = get_letter_pdf_filename(
|
||||
reference=letter.reference,
|
||||
crown=letter.service.crown,
|
||||
crown=letter.crown,
|
||||
created_at=letter.created_at,
|
||||
postage=letter.postage
|
||||
postage=postage
|
||||
)
|
||||
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
|
||||
yield {
|
||||
"Key": letter_file_name,
|
||||
"Size": letter_head['ContentLength'],
|
||||
"ServiceId": str(letter.service.id)
|
||||
"ServiceId": str(letter.service_id)
|
||||
}
|
||||
except BotoClientError as e:
|
||||
current_app.logger.exception(
|
||||
|
||||
@@ -717,11 +717,30 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ
|
||||
return notifications
|
||||
|
||||
|
||||
def dao_get_letters_to_be_printed(print_run_deadline, postage):
|
||||
def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000):
|
||||
"""
|
||||
Return all letters created before the print run deadline that have not yet been sent
|
||||
Return all letters created before the print run deadline that have not yet been sent. This yields in batches of 10k
|
||||
to prevent the query taking too long and eating up too much memory. As each 10k batch is yielded, the
|
||||
get_key_and_size_of_letters_to_be_sent_to_print function will go and fetch the s3 data, andhese start sending off
|
||||
tasks to the notify-ftp app to send them.
|
||||
|
||||
CAUTION! Modify this query with caution. Modifying filters etc is fine, but if we join onto another table, then
|
||||
there may be undefined behaviour. Essentially we need each ORM object returned for each row to be unique,
|
||||
and we should avoid modifying state of returned objects.
|
||||
|
||||
For more reading:
|
||||
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
|
||||
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html
|
||||
"""
|
||||
notifications = Notification.query.filter(
|
||||
notifications = db.session.query(
|
||||
Notification.id,
|
||||
Notification.created_at,
|
||||
Notification.reference,
|
||||
Notification.service_id,
|
||||
Service.crown,
|
||||
).join(
|
||||
Notification.service
|
||||
).filter(
|
||||
Notification.created_at < convert_bst_to_utc(print_run_deadline),
|
||||
Notification.notification_type == LETTER_TYPE,
|
||||
Notification.status == NOTIFICATION_CREATED,
|
||||
@@ -730,7 +749,7 @@ def dao_get_letters_to_be_printed(print_run_deadline, postage):
|
||||
).order_by(
|
||||
Notification.service_id,
|
||||
Notification.created_at
|
||||
).limit(50000)
|
||||
).yield_per(query_limit)
|
||||
return notifications
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user