split up _query_for_billing_data into three separate queries

the queries all return lots of columns, but each query has columns it
doesn't care about. eg emails don't have billable units or international
flag, letters don't have international flag, sms don't have a page count
etc. additionally, the query was grouping on things that never change,
like service id and notification type.

by making all of these literals (as in `select 1 as foo`) we see times
that are over 50% quicker for gov.uk email service.

Note: One of the tests changed because previously it involved emails and
sms with statuses that they could never be (eg returned-letter)
This commit is contained in:
Leo Hemsted
2020-02-19 10:58:06 +00:00
committed by Rebecca Law
parent f7f6be56c7
commit 0f6f2f1b91
4 changed files with 126 additions and 59 deletions

View File

@@ -5,6 +5,7 @@ from notifications_utils.statsd_decorators import statsd
from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst
from sqlalchemy.dialects.postgresql import insert from sqlalchemy.dialects.postgresql import insert
from sqlalchemy import func, case, desc, Date, Integer, and_ from sqlalchemy import func, case, desc, Date, Integer, and_
from sqlalchemy.sql.expression import literal
from app import db from app import db
from app.dao.date_util import ( from app.dao.date_util import (
@@ -20,9 +21,10 @@ from app.models import (
SMS_TYPE, SMS_TYPE,
Rate, Rate,
LetterRate, LetterRate,
NOTIFICATION_STATUS_TYPES_BILLABLE,
NotificationHistory, NotificationHistory,
EMAIL_TYPE, EMAIL_TYPE,
NOTIFICATION_STATUS_TYPES_BILLABLE_SMS,
NOTIFICATION_STATUS_TYPES_SENT_EMAILS,
NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS, NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS,
AnnualBilling, AnnualBilling,
Organisation, Organisation,
@@ -327,7 +329,7 @@ def fetch_billing_data_for_day(process_day, service_id=None):
notification_type=notification_type, notification_type=notification_type,
start_date=start_date, start_date=start_date,
end_date=end_date, end_date=end_date,
service_id=service.id service=service
) )
transit_data += results transit_data += results
@@ -335,50 +337,97 @@ def fetch_billing_data_for_day(process_day, service_id=None):
return transit_data return transit_data
def _query_for_billing_data(table, notification_type, start_date, end_date, service_id): def _query_for_billing_data(table, notification_type, start_date, end_date, service):
billable_type_list = { def _email_query():
SMS_TYPE: NOTIFICATION_STATUS_TYPES_BILLABLE, return db.session.query(
EMAIL_TYPE: NOTIFICATION_STATUS_TYPES_BILLABLE, table.template_id,
LETTER_TYPE: NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS literal(service.crown).label('crown'),
} literal(service.id).label('service_id'),
sent_by_func = func.coalesce(table.sent_by, case( literal(notification_type).label('notification_type'),
[(table.notification_type == 'letter', 'dvla'), literal('ses').label('sent_by'),
(table.notification_type == 'sms', 'unknown'), literal(0).label('rate_multiplier'),
(table.notification_type == 'email', 'ses')]), ) literal(False).label('international'),
letter_page_count = case([(table.notification_type == 'letter', table.billable_units), ]) literal(None).label('letter_page_count'),
literal('none').label('postage'),
literal(0).label('billable_units'),
func.count().label('notifications_sent'),
).filter(
table.status.in_(NOTIFICATION_STATUS_TYPES_SENT_EMAILS),
table.key_type != KEY_TYPE_TEST,
table.created_at >= start_date,
table.created_at < end_date,
table.notification_type == notification_type,
table.service_id == service.id
).group_by(
table.template_id,
)
query = db.session.query( def _sms_query():
table.template_id, sent_by = func.coalesce(table.sent_by, 'unknown')
table.service_id, rate_multiplier = func.coalesce(table.rate_multiplier, 1).cast(Integer)
table.notification_type, international = func.coalesce(table.international, False)
sent_by_func.label('sent_by'), return db.session.query(
func.coalesce(table.rate_multiplier, 1).cast(Integer).label('rate_multiplier'), table.template_id,
func.coalesce(table.international, False).label('international'), literal(service.crown).label('crown'),
letter_page_count.label('letter_page_count'), literal(service.id).label('service_id'),
func.sum(table.billable_units).label('billable_units'), literal(notification_type).label('notification_type'),
func.count().label('notifications_sent'), sent_by.label('sent_by'),
Service.crown, rate_multiplier.label('rate_multiplier'),
func.coalesce(table.postage, 'none').label('postage') international.label('international'),
).filter( literal(None).label('letter_page_count'),
table.status.in_(billable_type_list[notification_type]), literal('none').label('postage'),
table.key_type != KEY_TYPE_TEST, func.sum(table.billable_units).label('billable_units'),
table.created_at >= start_date, func.count().label('notifications_sent'),
table.created_at < end_date, ).filter(
table.notification_type == notification_type, table.status.in_(NOTIFICATION_STATUS_TYPES_BILLABLE_SMS),
table.service_id == service_id table.key_type != KEY_TYPE_TEST,
).group_by( table.created_at >= start_date,
table.template_id, table.created_at < end_date,
table.service_id, table.notification_type == notification_type,
table.notification_type, table.service_id == service.id
sent_by_func, ).group_by(
letter_page_count, table.template_id,
table.rate_multiplier, sent_by,
table.international, rate_multiplier,
Service.crown, international,
table.postage, )
).join(
Service def _letter_query():
) rate_multiplier = func.coalesce(table.rate_multiplier, 1).cast(Integer)
postage = func.coalesce(table.postage, 'none')
return db.session.query(
table.template_id,
literal(service.crown).label('crown'),
literal(service.id).label('service_id'),
literal(notification_type).label('notification_type'),
literal('dvla').label('sent_by'),
rate_multiplier.label('rate_multiplier'),
literal(False).label('international'), # todo: this may change in the future.
table.billable_units.label('letter_page_count'),
postage.label('postage'),
func.sum(table.billable_units).label('billable_units'),
func.count().label('notifications_sent'),
).filter(
table.status.in_(NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS),
table.key_type != KEY_TYPE_TEST,
table.created_at >= start_date,
table.created_at < end_date,
table.notification_type == notification_type,
table.service_id == service.id
).group_by(
table.template_id,
rate_multiplier,
table.billable_units,
postage
)
query_funcs = {
SMS_TYPE: _sms_query,
EMAIL_TYPE: _email_query,
LETTER_TYPE: _letter_query
}
query = query_funcs[notification_type]()
return query.all() return query.all()

View File

@@ -1287,12 +1287,6 @@ NOTIFICATION_STATUS_SUCCESS = [
NOTIFICATION_DELIVERED NOTIFICATION_DELIVERED
] ]
NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS = [
NOTIFICATION_SENDING,
NOTIFICATION_DELIVERED,
NOTIFICATION_RETURNED_LETTER,
]
NOTIFICATION_STATUS_TYPES_BILLABLE = [ NOTIFICATION_STATUS_TYPES_BILLABLE = [
NOTIFICATION_SENDING, NOTIFICATION_SENDING,
NOTIFICATION_SENT, NOTIFICATION_SENT,
@@ -1304,6 +1298,29 @@ NOTIFICATION_STATUS_TYPES_BILLABLE = [
NOTIFICATION_RETURNED_LETTER, NOTIFICATION_RETURNED_LETTER,
] ]
NOTIFICATION_STATUS_TYPES_BILLABLE_SMS = [
NOTIFICATION_SENDING,
NOTIFICATION_SENT, # internationally
NOTIFICATION_DELIVERED,
NOTIFICATION_PENDING,
NOTIFICATION_TEMPORARY_FAILURE,
NOTIFICATION_PERMANENT_FAILURE,
]
NOTIFICATION_STATUS_TYPES_BILLABLE_FOR_LETTERS = [
NOTIFICATION_SENDING,
NOTIFICATION_DELIVERED,
NOTIFICATION_RETURNED_LETTER,
]
# we don't really have a concept of billable emails - however the ft billing table only includes emails that we have
# actually sent.
NOTIFICATION_STATUS_TYPES_SENT_EMAILS = [
NOTIFICATION_SENDING,
NOTIFICATION_DELIVERED,
NOTIFICATION_TEMPORARY_FAILURE,
NOTIFICATION_PERMANENT_FAILURE,
]
NOTIFICATION_STATUS_TYPES = [ NOTIFICATION_STATUS_TYPES = [
NOTIFICATION_CANCELLED, NOTIFICATION_CANCELLED,
NOTIFICATION_CREATED, NOTIFICATION_CREATED,

View File

@@ -166,6 +166,7 @@ def test_create_nightly_billing_for_day_different_templates(
multiplier = [0, 1] multiplier = [0, 1]
billable_units = [0, 1] billable_units = [0, 1]
rate = [0, Decimal(1.33)] rate = [0, Decimal(1.33)]
for i, record in enumerate(records): for i, record in enumerate(records):
assert record.bst_date == datetime.date(yesterday) assert record.bst_date == datetime.date(yesterday)
assert record.rate == rate[i] assert record.rate == rate[i]

View File

@@ -170,7 +170,7 @@ def test_fetch_billing_data_for_day_is_grouped_by_notification_type(notify_db_se
today = convert_utc_to_bst(datetime.utcnow()) today = convert_utc_to_bst(datetime.utcnow())
results = fetch_billing_data_for_day(today.date()) results = fetch_billing_data_for_day(today.date())
assert len(results) == 3 assert len(results) == 3
notification_types = [x[2] for x in results if x[2] in ['email', 'sms', 'letter']] notification_types = [x.notification_type for x in results]
assert len(notification_types) == 3 assert len(notification_types) == 3
@@ -280,13 +280,13 @@ def test_fetch_billing_data_for_day_bills_correctly_for_status(notify_db_session
today = convert_utc_to_bst(datetime.utcnow()) today = convert_utc_to_bst(datetime.utcnow())
results = fetch_billing_data_for_day(process_day=today.date(), service_id=service.id) results = fetch_billing_data_for_day(process_day=today.date(), service_id=service.id)
sms_results = [x for x in results if x[2] == 'sms'] sms_results = [x for x in results if x.notification_type == 'sms']
email_results = [x for x in results if x[2] == 'email'] email_results = [x for x in results if x.notification_type == 'email']
letter_results = [x for x in results if x[2] == 'letter'] letter_results = [x for x in results if x.notification_type == 'letter']
assert 8 == sms_results[0][7] # we expect as many rows as we check for notification types
assert 8 == email_results[0][7] assert 6 == sms_results[0].notifications_sent
assert 3 == letter_results[0][7] assert 4 == email_results[0].notifications_sent
assert 3 == letter_results[0].notifications_sent
def test_get_rates_for_billing(notify_db_session): def test_get_rates_for_billing(notify_db_session):
create_rate(start_date=datetime.utcnow(), value=12, notification_type='email') create_rate(start_date=datetime.utcnow(), value=12, notification_type='email')