From 0f6f2f1b91a7646ae19aa64f3a77f675c3eae92e Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Wed, 19 Feb 2020 10:58:06 +0000 Subject: [PATCH] split up _query_for_billing_data into three separate queries the queries all return lots of columns, but each query has columns it doesn't care about. eg emails don't have billable units or international flag, letters don't have international flag, sms don't have a page count etc. additionally, the query was grouping on things that never change, like service id and notification type. by making all of these literals (as in `select 1 as foo`) we see times that are over 50% quicker for gov.uk email service. Note: One of the tests changed because previously it involved emails and sms with statuses that they could never be (eg returned-letter) --- app/dao/fact_billing_dao.py | 139 +++++++++++++++-------- app/models.py | 29 ++++- tests/app/celery/test_reporting_tasks.py | 1 + tests/app/dao/test_ft_billing_dao.py | 16 +-- 4 files changed, 126 insertions(+), 59 deletions(-) diff --git a/app/dao/fact_billing_dao.py b/app/dao/fact_billing_dao.py index 1527f8c88..5b5da3f31 100644 --- a/app/dao/fact_billing_dao.py +++ b/app/dao/fact_billing_dao.py @@ -5,6 +5,7 @@ from notifications_utils.statsd_decorators import statsd from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst from sqlalchemy.dialects.postgresql import insert from sqlalchemy import func, case, desc, Date, Integer, and_ +from sqlalchemy.sql.expression import literal from app import db from app.dao.date_util import ( @@ -20,9 +21,10 @@ from app.models import ( SMS_TYPE, Rate, LetterRate, - NOTIFICATION_STATUS_TYPES_BILLABLE, NotificationHistory, EMAIL_TYPE, + NOTIFICATION_STATUS_TYPES_BILLABLE_SMS, + NOTIFICATION_STATUS_TYPES_SENT_EMAILS, NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS, AnnualBilling, Organisation, @@ -327,7 +329,7 @@ def fetch_billing_data_for_day(process_day, service_id=None): notification_type=notification_type, start_date=start_date, end_date=end_date, - service_id=service.id + service=service ) transit_data += results @@ -335,50 +337,97 @@ def fetch_billing_data_for_day(process_day, service_id=None): return transit_data -def _query_for_billing_data(table, notification_type, start_date, end_date, service_id): - billable_type_list = { - SMS_TYPE: NOTIFICATION_STATUS_TYPES_BILLABLE, - EMAIL_TYPE: NOTIFICATION_STATUS_TYPES_BILLABLE, - LETTER_TYPE: NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS - } - sent_by_func = func.coalesce(table.sent_by, case( - [(table.notification_type == 'letter', 'dvla'), - (table.notification_type == 'sms', 'unknown'), - (table.notification_type == 'email', 'ses')]), ) - letter_page_count = case([(table.notification_type == 'letter', table.billable_units), ]) +def _query_for_billing_data(table, notification_type, start_date, end_date, service): + def _email_query(): + return db.session.query( + table.template_id, + literal(service.crown).label('crown'), + literal(service.id).label('service_id'), + literal(notification_type).label('notification_type'), + literal('ses').label('sent_by'), + literal(0).label('rate_multiplier'), + literal(False).label('international'), + literal(None).label('letter_page_count'), + literal('none').label('postage'), + literal(0).label('billable_units'), + func.count().label('notifications_sent'), + ).filter( + table.status.in_(NOTIFICATION_STATUS_TYPES_SENT_EMAILS), + table.key_type != KEY_TYPE_TEST, + table.created_at >= start_date, + table.created_at < end_date, + table.notification_type == notification_type, + table.service_id == service.id + ).group_by( + table.template_id, + ) - query = db.session.query( - table.template_id, - table.service_id, - table.notification_type, - sent_by_func.label('sent_by'), - func.coalesce(table.rate_multiplier, 1).cast(Integer).label('rate_multiplier'), - func.coalesce(table.international, False).label('international'), - letter_page_count.label('letter_page_count'), - func.sum(table.billable_units).label('billable_units'), - func.count().label('notifications_sent'), - Service.crown, - func.coalesce(table.postage, 'none').label('postage') - ).filter( - table.status.in_(billable_type_list[notification_type]), - table.key_type != KEY_TYPE_TEST, - table.created_at >= start_date, - table.created_at < end_date, - table.notification_type == notification_type, - table.service_id == service_id - ).group_by( - table.template_id, - table.service_id, - table.notification_type, - sent_by_func, - letter_page_count, - table.rate_multiplier, - table.international, - Service.crown, - table.postage, - ).join( - Service - ) + def _sms_query(): + sent_by = func.coalesce(table.sent_by, 'unknown') + rate_multiplier = func.coalesce(table.rate_multiplier, 1).cast(Integer) + international = func.coalesce(table.international, False) + return db.session.query( + table.template_id, + literal(service.crown).label('crown'), + literal(service.id).label('service_id'), + literal(notification_type).label('notification_type'), + sent_by.label('sent_by'), + rate_multiplier.label('rate_multiplier'), + international.label('international'), + literal(None).label('letter_page_count'), + literal('none').label('postage'), + func.sum(table.billable_units).label('billable_units'), + func.count().label('notifications_sent'), + ).filter( + table.status.in_(NOTIFICATION_STATUS_TYPES_BILLABLE_SMS), + table.key_type != KEY_TYPE_TEST, + table.created_at >= start_date, + table.created_at < end_date, + table.notification_type == notification_type, + table.service_id == service.id + ).group_by( + table.template_id, + sent_by, + rate_multiplier, + international, + ) + + def _letter_query(): + rate_multiplier = func.coalesce(table.rate_multiplier, 1).cast(Integer) + postage = func.coalesce(table.postage, 'none') + return db.session.query( + table.template_id, + literal(service.crown).label('crown'), + literal(service.id).label('service_id'), + literal(notification_type).label('notification_type'), + literal('dvla').label('sent_by'), + rate_multiplier.label('rate_multiplier'), + literal(False).label('international'), # todo: this may change in the future. + table.billable_units.label('letter_page_count'), + postage.label('postage'), + func.sum(table.billable_units).label('billable_units'), + func.count().label('notifications_sent'), + ).filter( + table.status.in_(NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS), + table.key_type != KEY_TYPE_TEST, + table.created_at >= start_date, + table.created_at < end_date, + table.notification_type == notification_type, + table.service_id == service.id + ).group_by( + table.template_id, + rate_multiplier, + table.billable_units, + postage + ) + + query_funcs = { + SMS_TYPE: _sms_query, + EMAIL_TYPE: _email_query, + LETTER_TYPE: _letter_query + } + + query = query_funcs[notification_type]() return query.all() diff --git a/app/models.py b/app/models.py index 23c0f9cd6..a2df80f5a 100644 --- a/app/models.py +++ b/app/models.py @@ -1287,12 +1287,6 @@ NOTIFICATION_STATUS_SUCCESS = [ NOTIFICATION_DELIVERED ] -NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS = [ - NOTIFICATION_SENDING, - NOTIFICATION_DELIVERED, - NOTIFICATION_RETURNED_LETTER, -] - NOTIFICATION_STATUS_TYPES_BILLABLE = [ NOTIFICATION_SENDING, NOTIFICATION_SENT, @@ -1304,6 +1298,29 @@ NOTIFICATION_STATUS_TYPES_BILLABLE = [ NOTIFICATION_RETURNED_LETTER, ] +NOTIFICATION_STATUS_TYPES_BILLABLE_SMS = [ + NOTIFICATION_SENDING, + NOTIFICATION_SENT, # internationally + NOTIFICATION_DELIVERED, + NOTIFICATION_PENDING, + NOTIFICATION_TEMPORARY_FAILURE, + NOTIFICATION_PERMANENT_FAILURE, +] + +NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS = [ + NOTIFICATION_SENDING, + NOTIFICATION_DELIVERED, + NOTIFICATION_RETURNED_LETTER, +] +# we don't really have a concept of billable emails - however the ft billing table only includes emails that we have +# actually sent. +NOTIFICATION_STATUS_TYPES_SENT_EMAILS = [ + NOTIFICATION_SENDING, + NOTIFICATION_DELIVERED, + NOTIFICATION_TEMPORARY_FAILURE, + NOTIFICATION_PERMANENT_FAILURE, +] + NOTIFICATION_STATUS_TYPES = [ NOTIFICATION_CANCELLED, NOTIFICATION_CREATED, diff --git a/tests/app/celery/test_reporting_tasks.py b/tests/app/celery/test_reporting_tasks.py index c0fc69074..c1eb4d298 100644 --- a/tests/app/celery/test_reporting_tasks.py +++ b/tests/app/celery/test_reporting_tasks.py @@ -166,6 +166,7 @@ def test_create_nightly_billing_for_day_different_templates( multiplier = [0, 1] billable_units = [0, 1] rate = [0, Decimal(1.33)] + for i, record in enumerate(records): assert record.bst_date == datetime.date(yesterday) assert record.rate == rate[i] diff --git a/tests/app/dao/test_ft_billing_dao.py b/tests/app/dao/test_ft_billing_dao.py index b533cbdbc..77507474a 100644 --- a/tests/app/dao/test_ft_billing_dao.py +++ b/tests/app/dao/test_ft_billing_dao.py @@ -170,7 +170,7 @@ def test_fetch_billing_data_for_day_is_grouped_by_notification_type(notify_db_se today = convert_utc_to_bst(datetime.utcnow()) results = fetch_billing_data_for_day(today.date()) assert len(results) == 3 - notification_types = [x[2] for x in results if x[2] in ['email', 'sms', 'letter']] + notification_types = [x.notification_type for x in results] assert len(notification_types) == 3 @@ -280,13 +280,13 @@ def test_fetch_billing_data_for_day_bills_correctly_for_status(notify_db_session today = convert_utc_to_bst(datetime.utcnow()) results = fetch_billing_data_for_day(process_day=today.date(), service_id=service.id) - sms_results = [x for x in results if x[2] == 'sms'] - email_results = [x for x in results if x[2] == 'email'] - letter_results = [x for x in results if x[2] == 'letter'] - assert 8 == sms_results[0][7] - assert 8 == email_results[0][7] - assert 3 == letter_results[0][7] - + sms_results = [x for x in results if x.notification_type == 'sms'] + email_results = [x for x in results if x.notification_type == 'email'] + letter_results = [x for x in results if x.notification_type == 'letter'] + # we expect as many rows as we check for notification types + assert 6 == sms_results[0].notifications_sent + assert 4 == email_results[0].notifications_sent + assert 3 == letter_results[0].notifications_sent def test_get_rates_for_billing(notify_db_session): create_rate(start_date=datetime.utcnow(), value=12, notification_type='email')