From 17767dfa627cef88ef06da66cc175763ce2abb2f Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Mon, 9 Jan 2017 11:13:24 +0000 Subject: [PATCH 1/4] Calculating the billing units per month was taking too long for a services with a lot of data. The query now does the sum per month. --- app/dao/notifications_dao.py | 22 +++++++------------- tests/app/dao/test_notification_dao.py | 28 ++++++++++++++------------ 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index d62e62607..b5a1b6591 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -1,14 +1,13 @@ + import pytz from datetime import ( datetime, timedelta, - date -) -from itertools import groupby + date) from flask import current_app from werkzeug.datastructures import MultiDict -from sqlalchemy import (desc, func, or_, and_, asc) +from sqlalchemy import (desc, func, or_, and_, asc, extract) from sqlalchemy.orm import joinedload from app import db, create_uuid @@ -217,23 +216,16 @@ def get_notification_billable_unit_count_per_month(service_id, year): start, end = get_financial_year(year) notifications = db.session.query( - NotificationHistory.created_at, - NotificationHistory.billable_units - ).order_by( - NotificationHistory.created_at + func.date_trunc("month", (NotificationHistory.created_at + extract("timezone", func.timezone("Europe/London", NotificationHistory.created_at)) * timedelta(seconds=1))), + func.sum(NotificationHistory.billable_units) ).filter( NotificationHistory.billable_units != 0, NotificationHistory.service_id == service_id, NotificationHistory.created_at >= start, NotificationHistory.created_at < end - ) + ).group_by(func.date_trunc("month", (NotificationHistory.created_at + extract("timezone", func.timezone("Europe/London", NotificationHistory.created_at)) * timedelta(seconds=1)))).all() - return [ - (month, sum(count for _, count in row)) - for month, row in groupby( - notifications, lambda row: get_bst_month(row[0]) - ) - ] + return [(datetime.strftime(x[0], "%B"), x[1]) for x in notifications] @statsd(namespace="dao") diff --git a/tests/app/dao/test_notification_dao.py b/tests/app/dao/test_notification_dao.py index 8e60f58a3..029435775 100644 --- a/tests/app/dao/test_notification_dao.py +++ b/tests/app/dao/test_notification_dao.py @@ -700,21 +700,23 @@ def test_get_all_notifications_for_job_by_status(notify_db, notify_db_session, s def test_get_notification_billable_unit_count_per_month(notify_db, notify_db_session, sample_service): - for year, month, day in ( - (2017, 1, 15), # ↓ 2016 financial year - (2016, 8, 1), - (2016, 7, 15), - (2016, 4, 15), - (2016, 4, 15), - (2016, 4, 1), # ↓ 2015 financial year - (2016, 3, 31), - (2016, 1, 15) + for year, month, day, hour, minute, second in ( + (2017, 1, 15, 23, 59, 59), # ↓ 2016 financial year + (2016, 9, 30, 23, 59, 59), # counts in October (BST conversion) + (2016, 6, 30, 23, 50, 20), + (2016, 7, 15, 9, 20, 25), + (2016, 4, 15, 12, 30, 00), + (2016, 4, 1, 1, 1, 00), + (2016, 4, 1, 0, 0, 00), # ↓ 2015 financial year + (2016, 3, 20, 22, 40, 45), + (2015, 11, 20, 22, 40, 45), + (2016, 1, 15, 2, 30, 40) ): sample_notification( notify_db, notify_db_session, service=sample_service, created_at=datetime( - year, month, day, 0, 0, 0, 0 - ) - timedelta(hours=1, seconds=1) # one second before midnight + year, month, day, hour, minute, second, 0 + ) ) for financial_year, months in ( @@ -724,11 +726,11 @@ def test_get_notification_billable_unit_count_per_month(notify_db, notify_db_ses ), ( 2016, - [('April', 2), ('July', 2), ('January', 1)] + [('April', 2), ('July', 2), ('October', 1), ('January', 1)] ), ( 2015, - [('January', 1), ('March', 2)] + [('November', 1), ('January', 1), ('March', 1), ('April', 1)] ), ( 2014, From 88e1e58f24d26b69c4ee00d77dbfcf78dde0b847 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Mon, 9 Jan 2017 14:54:25 +0000 Subject: [PATCH 2/4] Fix the get_april_fools method to use March 31 23:00 as the timestamp. The financial year start April 1, 00:00 BST and our dates are stored as UTC. Added a test for get_april_fools. Added some test more test data for get_billable_unit_count_per_month. --- app/dao/notifications_dao.py | 29 +++++++++++++++----------- tests/app/dao/test_notification_dao.py | 27 +++++++++++++----------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index b5a1b6591..4295eb48b 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -216,14 +216,27 @@ def get_notification_billable_unit_count_per_month(service_id, year): start, end = get_financial_year(year) notifications = db.session.query( - func.date_trunc("month", (NotificationHistory.created_at + extract("timezone", func.timezone("Europe/London", NotificationHistory.created_at)) * timedelta(seconds=1))), + func.date_trunc("month", + (NotificationHistory.created_at + + extract("timezone", func.timezone("Europe/London", + NotificationHistory.created_at)) * timedelta(seconds=1))), func.sum(NotificationHistory.billable_units) ).filter( NotificationHistory.billable_units != 0, NotificationHistory.service_id == service_id, NotificationHistory.created_at >= start, NotificationHistory.created_at < end - ).group_by(func.date_trunc("month", (NotificationHistory.created_at + extract("timezone", func.timezone("Europe/London", NotificationHistory.created_at)) * timedelta(seconds=1)))).all() + ).group_by(func.date_trunc("month", + (NotificationHistory.created_at + + extract("timezone", + func.timezone("Europe/London", + NotificationHistory.created_at)) * timedelta(seconds=1))) + ).order_by(func.date_trunc("month", + (NotificationHistory.created_at + + extract("timezone", + func.timezone("Europe/London", + NotificationHistory.created_at)) * timedelta( + seconds=1)))).all() return [(datetime.strftime(x[0], "%B"), x[1]) for x in notifications] @@ -376,13 +389,5 @@ def get_financial_year(year): def get_april_fools(year): - return datetime( - year, 4, 1, 0, 0, 0, 0, - pytz.timezone("Europe/London") - ).astimezone(pytz.utc) - - -def get_bst_month(datetime): - return pytz.utc.localize(datetime).astimezone( - pytz.timezone("Europe/London") - ).strftime('%B') + return pytz.timezone('Europe/London').localize(datetime(year, 4, 1, 0, 0, 0)).astimezone(pytz.UTC).replace( + tzinfo=None) diff --git a/tests/app/dao/test_notification_dao.py b/tests/app/dao/test_notification_dao.py index 029435775..e686deca8 100644 --- a/tests/app/dao/test_notification_dao.py +++ b/tests/app/dao/test_notification_dao.py @@ -39,7 +39,8 @@ from app.dao.notifications_dao import ( update_notification_status_by_reference, dao_delete_notifications_and_history_by_id, dao_timeout_notifications, - get_financial_year) + get_financial_year, + get_april_fools) from tests.app.conftest import (sample_notification, sample_template, sample_email_template, sample_service, sample_job, sample_api_key) @@ -702,15 +703,15 @@ def test_get_notification_billable_unit_count_per_month(notify_db, notify_db_ses for year, month, day, hour, minute, second in ( (2017, 1, 15, 23, 59, 59), # ↓ 2016 financial year - (2016, 9, 30, 23, 59, 59), # counts in October (BST conversion) + (2016, 9, 30, 23, 59, 59), # counts in October with BST conversion (2016, 6, 30, 23, 50, 20), (2016, 7, 15, 9, 20, 25), - (2016, 4, 15, 12, 30, 00), (2016, 4, 1, 1, 1, 00), - (2016, 4, 1, 0, 0, 00), # ↓ 2015 financial year - (2016, 3, 20, 22, 40, 45), + (2016, 4, 1, 0, 0, 00), + (2016, 3, 31, 23, 00, 1), # counts in April with BST conversion + (2015, 4, 1, 13, 8, 59), # ↓ 2015 financial year (2015, 11, 20, 22, 40, 45), - (2016, 1, 15, 2, 30, 40) + (2016, 1, 31, 23, 30, 40) # counts in January no BST conversion in winter ): sample_notification( notify_db, notify_db_session, service=sample_service, @@ -726,11 +727,11 @@ def test_get_notification_billable_unit_count_per_month(notify_db, notify_db_ses ), ( 2016, - [('April', 2), ('July', 2), ('October', 1), ('January', 1)] + [('April', 3), ('July', 2), ('October', 1), ('January', 1)] ), ( 2015, - [('November', 1), ('January', 1), ('March', 1), ('April', 1)] + [('April', 1), ('November', 1), ('January', 1)] ), ( 2014, @@ -1203,7 +1204,9 @@ def test_should_exclude_test_key_notifications_by_default( def test_get_financial_year(): start, end = get_financial_year(2000) - assert start.tzinfo == pytz.utc - assert start.isoformat() == '2000-04-01T00:01:00+00:00' - assert end.tzinfo == pytz.utc - assert end.isoformat() == '2001-04-01T00:01:00+00:00' + assert str(start) == '2000-03-31 23:00:00' + assert str(end) == '2001-03-31 23:00:00' + + +def test_get_april_fools(): + assert str(get_april_fools(2016)) == '2016-03-31 23:00:00' From a1d8ca936408a0fb4908b541225e2faa21e0c6cc Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Mon, 9 Jan 2017 15:34:24 +0000 Subject: [PATCH 3/4] Use a variable for the massive month date conversion. Document the massive date function. Document the get_april_fools function. --- app/dao/notifications_dao.py | 36 +++++++++++++++----------- tests/app/dao/test_notification_dao.py | 4 ++- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 4295eb48b..3f08f8889 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -215,28 +215,28 @@ def get_notifications_for_job(service_id, job_id, filter_dict=None, page=1, page def get_notification_billable_unit_count_per_month(service_id, year): start, end = get_financial_year(year) + """ + The query needs to sum the billable_units per month, but this needs to be the month in BST (Britsh Standard Time). + The database stores all timestamps as UTC without the timezone. + - First get the created_at as Europe/London or BST + - then extract the timezone portion of the datetime + - then add the timezone portion to the created_at datetime + - lastly truncate the datetime to month to group the sum of the billable_units + """ + month = func.date_trunc("month", (NotificationHistory.created_at + + extract("timezone", + func.timezone("Europe/London", + NotificationHistory.created_at)) * timedelta(seconds=1))) notifications = db.session.query( - func.date_trunc("month", - (NotificationHistory.created_at + - extract("timezone", func.timezone("Europe/London", - NotificationHistory.created_at)) * timedelta(seconds=1))), + month, func.sum(NotificationHistory.billable_units) ).filter( NotificationHistory.billable_units != 0, NotificationHistory.service_id == service_id, NotificationHistory.created_at >= start, NotificationHistory.created_at < end - ).group_by(func.date_trunc("month", - (NotificationHistory.created_at + - extract("timezone", - func.timezone("Europe/London", - NotificationHistory.created_at)) * timedelta(seconds=1))) - ).order_by(func.date_trunc("month", - (NotificationHistory.created_at + - extract("timezone", - func.timezone("Europe/London", - NotificationHistory.created_at)) * timedelta( - seconds=1)))).all() + ).group_by(month + ).order_by(month).all() return [(datetime.strftime(x[0], "%B"), x[1]) for x in notifications] @@ -389,5 +389,11 @@ def get_financial_year(year): def get_april_fools(year): + """ + This function converts the start of the financial year April 1, 00:00 as BST (British Standard Time) to UTC, + the tzinfo is lastly removed from the datetime becasue the database stores the timestamps without timezone. + :param year: the year to calculate the April 1, 00:00 BST for + :return: the datetime of April 1 for the given year, for example 2016 = 2016-03-31 23:00:00 + """ return pytz.timezone('Europe/London').localize(datetime(year, 4, 1, 0, 0, 0)).astimezone(pytz.UTC).replace( tzinfo=None) diff --git a/tests/app/dao/test_notification_dao.py b/tests/app/dao/test_notification_dao.py index e686deca8..50777beb6 100644 --- a/tests/app/dao/test_notification_dao.py +++ b/tests/app/dao/test_notification_dao.py @@ -1209,4 +1209,6 @@ def test_get_financial_year(): def test_get_april_fools(): - assert str(get_april_fools(2016)) == '2016-03-31 23:00:00' + april_fools = get_april_fools(2016) + assert str(april_fools) == '2016-03-31 23:00:00' + assert april_fools.tzinfo is None From ffa54821d9339aafd3ff646796c985cdc3e8b01c Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Tue, 10 Jan 2017 11:21:33 +0000 Subject: [PATCH 4/4] The previous commit was failing the test in the docker container. My local version of postgres is 9.6 and docker is using 9.5 (so I am ahead). However, we found a more simple solution that works for both versions. --- app/dao/notifications_dao.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 3f08f8889..d0fd17e8e 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -216,17 +216,15 @@ def get_notification_billable_unit_count_per_month(service_id, year): start, end = get_financial_year(year) """ - The query needs to sum the billable_units per month, but this needs to be the month in BST (Britsh Standard Time). + The query needs to sum the billable_units per month, but this needs to be the month in BST (British Standard Time). The database stores all timestamps as UTC without the timezone. - - First get the created_at as Europe/London or BST - - then extract the timezone portion of the datetime - - then add the timezone portion to the created_at datetime + - First set the timezone on created_at to UTC + - then convert the timezone to BST (or Europe/London) - lastly truncate the datetime to month to group the sum of the billable_units """ - month = func.date_trunc("month", (NotificationHistory.created_at + - extract("timezone", - func.timezone("Europe/London", - NotificationHistory.created_at)) * timedelta(seconds=1))) + month = func.date_trunc("month", + func.timezone("Europe/London", func.timezone("UTC", + NotificationHistory.created_at))) notifications = db.session.query( month, func.sum(NotificationHistory.billable_units) @@ -235,8 +233,11 @@ def get_notification_billable_unit_count_per_month(service_id, year): NotificationHistory.service_id == service_id, NotificationHistory.created_at >= start, NotificationHistory.created_at < end - ).group_by(month - ).order_by(month).all() + ).group_by( + month + ).order_by( + month + ).all() return [(datetime.strftime(x[0], "%B"), x[1]) for x in notifications]