diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 5346ed63a..aa57779f7 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -717,9 +717,20 @@ def notifications_not_yet_sent(should_be_sending_after_seconds, notification_typ return notifications -def dao_get_letters_to_be_printed(print_run_deadline, postage): +def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000): """ - Return all letters created before the print run deadline that have not yet been sent + Return all letters created before the print run deadline that have not yet been sent. This yields in batches of 10k + to prevent the query taking too long and eating up too much memory. As each 10k batch is yielded, the + get_key_and_size_of_letters_to_be_sent_to_print function will go and fetch the s3 data, andhese start sending off + tasks to the notify-ftp app to send them. + + CAUTION! Modify this query with caution. Modifying filters etc is fine, but if we join onto another table, then + there may be undefined behaviour. Essentially we need each ORM object returned for each row to be unique, + and we should avoid modifying state of returned objects. + + For more reading: + https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per + https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html """ notifications = db.session.query( Notification.id, @@ -738,7 +749,7 @@ def dao_get_letters_to_be_printed(print_run_deadline, postage): ).order_by( Notification.service_id, Notification.created_at - ).limit(50000) + ).yield_per(query_limit) return notifications diff --git a/tests/app/dao/notification_dao/test_notification_dao.py b/tests/app/dao/notification_dao/test_notification_dao.py index 3e74ca0e9..b0e4938b2 100644 --- a/tests/app/dao/notification_dao/test_notification_dao.py +++ b/tests/app/dao/notification_dao/test_notification_dao.py @@ -1688,10 +1688,20 @@ def test_letters_to_be_printed_sort_by_service(notify_db_session): second_service = create_service(service_name='second service', service_id='642bf33b-54b5-45f2-8c13-942a46616704') first_template = create_template(service=first_service, template_type='letter', postage='second') second_template = create_template(service=second_service, template_type='letter', postage='second') - notification_1 = create_notification(template=first_template, created_at=datetime(2020, 12, 1, 9, 30)) - notification_2 = create_notification(template=first_template, created_at=datetime(2020, 12, 1, 12, 30)) - notification_3 = create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 30)) + letters_ordered_by_service_then_time = [ + create_notification(template=first_template, created_at=datetime(2020, 12, 1, 9, 30)), + create_notification(template=first_template, created_at=datetime(2020, 12, 1, 12, 30)), + create_notification(template=first_template, created_at=datetime(2020, 12, 1, 13, 30)), + create_notification(template=first_template, created_at=datetime(2020, 12, 1, 14, 30)), + create_notification(template=first_template, created_at=datetime(2020, 12, 1, 15, 30)), + create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 30)), + create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 31)), + create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 32)), + create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 33)), + create_notification(template=second_template, created_at=datetime(2020, 12, 1, 8, 34)) + ] - results = list(dao_get_letters_to_be_printed(print_run_deadline=datetime(2020, 12, 1, 17, 30), postage='second')) - assert len(results) == 3 - assert [x.id for x in results] == [notification_1.id, notification_2.id, notification_3.id] + results = list( + dao_get_letters_to_be_printed(print_run_deadline=datetime(2020, 12, 1, 17, 30), postage='second', query_limit=4) + ) + assert [x.id for x in results] == [x.id for x in letters_ordered_by_service_then_time]