mirror of
https://github.com/GSA/notifications-api.git
synced 2026-01-31 15:15:38 -05:00
Merge pull request #3014 from alphagov/get-query-in-50k-batches
Optimise dao_get_letters_to_be_printed query
This commit is contained in:
@@ -182,15 +182,15 @@ def get_key_and_size_of_letters_to_be_sent_to_print(print_run_deadline, postage)
|
|||||||
try:
|
try:
|
||||||
letter_file_name = get_letter_pdf_filename(
|
letter_file_name = get_letter_pdf_filename(
|
||||||
reference=letter.reference,
|
reference=letter.reference,
|
||||||
crown=letter.service.crown,
|
crown=letter.crown,
|
||||||
created_at=letter.created_at,
|
created_at=letter.created_at,
|
||||||
postage=letter.postage
|
postage=postage
|
||||||
)
|
)
|
||||||
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
|
letter_head = s3.head_s3_object(current_app.config['LETTERS_PDF_BUCKET_NAME'], letter_file_name)
|
||||||
yield {
|
yield {
|
||||||
"Key": letter_file_name,
|
"Key": letter_file_name,
|
||||||
"Size": letter_head['ContentLength'],
|
"Size": letter_head['ContentLength'],
|
||||||
"ServiceId": str(letter.service.id)
|
"ServiceId": str(letter.service_id)
|
||||||
}
|
}
|
||||||
except BotoClientError as e:
|
except BotoClientError as e:
|
||||||
current_app.logger.exception(
|
current_app.logger.exception(
|
||||||
|
|||||||
@@ -717,11 +717,30 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ
|
|||||||
return notifications
|
return notifications
|
||||||
|
|
||||||
|
|
||||||
def dao_get_letters_to_be_printed(print_run_deadline, postage):
|
def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000):
|
||||||
"""
|
"""
|
||||||
Return all letters created before the print run deadline that have not yet been sent
|
Return all letters created before the print run deadline that have not yet been sent. This yields in batches of 10k
|
||||||
|
to prevent the query taking too long and eating up too much memory. As each 10k batch is yielded, the
|
||||||
|
get_key_and_size_of_letters_to_be_sent_to_print function will go and fetch the s3 data, andhese start sending off
|
||||||
|
tasks to the notify-ftp app to send them.
|
||||||
|
|
||||||
|
CAUTION! Modify this query with caution. Modifying filters etc is fine, but if we join onto another table, then
|
||||||
|
there may be undefined behaviour. Essentially we need each ORM object returned for each row to be unique,
|
||||||
|
and we should avoid modifying state of returned objects.
|
||||||
|
|
||||||
|
For more reading:
|
||||||
|
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
|
||||||
|
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html
|
||||||
"""
|
"""
|
||||||
notifications = Notification.query.filter(
|
notifications = db.session.query(
|
||||||
|
Notification.id,
|
||||||
|
Notification.created_at,
|
||||||
|
Notification.reference,
|
||||||
|
Notification.service_id,
|
||||||
|
Service.crown,
|
||||||
|
).join(
|
||||||
|
Notification.service
|
||||||
|
).filter(
|
||||||
Notification.created_at < convert_bst_to_utc(print_run_deadline),
|
Notification.created_at < convert_bst_to_utc(print_run_deadline),
|
||||||
Notification.notification_type == LETTER_TYPE,
|
Notification.notification_type == LETTER_TYPE,
|
||||||
Notification.status == NOTIFICATION_CREATED,
|
Notification.status == NOTIFICATION_CREATED,
|
||||||
@@ -730,7 +749,7 @@ def dao_get_letters_to_be_printed(print_run_deadline, postage):
|
|||||||
).order_by(
|
).order_by(
|
||||||
Notification.service_id,
|
Notification.service_id,
|
||||||
Notification.created_at
|
Notification.created_at
|
||||||
).limit(50000)
|
).yield_per(query_limit)
|
||||||
return notifications
|
return notifications
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1688,10 +1688,20 @@ def test_letters_to_be_printed_sort_by_service(notify_db_session):
|
|||||||
second_service = create_service(service_name='second service', service_id='642bf33b-54b5-45f2-8c13-942a46616704')
|
second_service = create_service(service_name='second service', service_id='642bf33b-54b5-45f2-8c13-942a46616704')
|
||||||
first_template = create_template(service=first_service, template_type='letter', postage='second')
|
first_template = create_template(service=first_service, template_type='letter', postage='second')
|
||||||
second_template = create_template(service=second_service, template_type='letter', postage='second')
|
second_template = create_template(service=second_service, template_type='letter', postage='second')
|
||||||
notification_1 = create_notification(template=first_template, created_at=datetime(2020, 12, 1, 9, 30))
|
letters_ordered_by_service_then_time = [
|
||||||
notification_2 = create_notification(template=first_template, created_at=datetime(2020, 12, 1, 12, 30))
|
create_notification(template=first_template, created_at=datetime(2020, 12, 1, 9, 30)),
|
||||||
notification_3 = create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 30))
|
create_notification(template=first_template, created_at=datetime(2020, 12, 1, 12, 30)),
|
||||||
|
create_notification(template=first_template, created_at=datetime(2020, 12, 1, 13, 30)),
|
||||||
|
create_notification(template=first_template, created_at=datetime(2020, 12, 1, 14, 30)),
|
||||||
|
create_notification(template=first_template, created_at=datetime(2020, 12, 1, 15, 30)),
|
||||||
|
create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 30)),
|
||||||
|
create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 31)),
|
||||||
|
create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 32)),
|
||||||
|
create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 33)),
|
||||||
|
create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 34))
|
||||||
|
]
|
||||||
|
|
||||||
results = list(dao_get_letters_to_be_printed(print_run_deadline=datetime(2020, 12, 1, 17, 30), postage='second'))
|
results = list(
|
||||||
assert len(results) == 3
|
dao_get_letters_to_be_printed(print_run_deadline=datetime(2020, 12, 1, 17, 30), postage='second', query_limit=4)
|
||||||
assert results == [notification_1, notification_2, notification_3]
|
)
|
||||||
|
assert [x.id for x in results] == [x.id for x in letters_ordered_by_service_then_time]
|
||||||
|
|||||||
Reference in New Issue
Block a user