Files
notifications-api/app/dao/date_util.py

88 lines
2.9 KiB
Python
Raw Normal View History

2021-03-10 13:55:06 +00:00
from datetime import date, datetime, time, timedelta
import pytz
2022-11-14 14:23:54 -05:00
from notifications_utils.timezones import (
convert_local_timezone_to_utc,
convert_utc_to_local_timezone,
2022-11-21 11:29:38 -05:00
local_timezone,
2022-11-14 14:23:54 -05:00
)
def get_months_for_financial_year(year):
return [
2022-11-10 12:33:25 -05:00
convert_local_timezone_to_utc(month) for month in (
get_months_for_year(4, 13, year)
+ get_months_for_year(1, 4, year + 1)
)
2022-11-10 12:33:25 -05:00
if convert_local_timezone_to_utc(month) < datetime.now()
]
def get_months_for_year(start, end, year):
return [datetime(year, month, 1) for month in range(start, end)]
def get_financial_year(year):
return get_april_fools(year), get_april_fools(year + 1) - timedelta(microseconds=1)
Standardise timezones for service usage APIs We want to query for service usage in the BST financial year: 2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 => 2022-04-01 to 2023-03-31 # bst_date Previously we were only doing this explicitly for the monthly API and it seemed like the yearly usage API was incorrectly querying: 2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 => 2022-03-31 to 2023-03-30 # "bst_date" However, it turns out this isn't a problem for two reasons: 1. We've been lucky that none of our rates have changed since 2017, which is long ago enough that no one would care. 2. There's a quirk somewhere in Sqlalchemy / Postgres that has been compensating for the lack of explicit BST conversion. To help ensure we do this consistently in future I've DRYed-up the BST conversion into a new utility. I could have just hard-coded the dates but it seemed strange to have the knowledge twice. I've also adjusted the tests so they detect if we accidentally use data from a different financial year. (2) is why none of the test assertions actually need changing and users won't be affected. Sqlalchemy / Postgres quirk =========================== The following queries were run on the same data but results differ: FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 4, 1) FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 3, 31) Looking at the actual query for the first item above still suggests the results should be the same, but for the use of "timestamp". SELECT ... FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate If we try some manual queries with and without '::timestamp' we get: select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 It looks like this is happening because all client connections are aware of the local timezone, and naive datetimes are interpreted as being in UTC - not necessarily true, but saves us here! The monthly API datetimes were pre-converted to dates, so none of this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
def get_financial_year_dates(year):
year_start_datetime, year_end_datetime = get_financial_year(year)
return (
2022-11-10 12:33:25 -05:00
convert_utc_to_local_timezone(year_start_datetime).date(),
convert_utc_to_local_timezone(year_end_datetime).date()
Standardise timezones for service usage APIs We want to query for service usage in the BST financial year: 2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 => 2022-04-01 to 2023-03-31 # bst_date Previously we were only doing this explicitly for the monthly API and it seemed like the yearly usage API was incorrectly querying: 2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 => 2022-03-31 to 2023-03-30 # "bst_date" However, it turns out this isn't a problem for two reasons: 1. We've been lucky that none of our rates have changed since 2017, which is long ago enough that no one would care. 2. There's a quirk somewhere in Sqlalchemy / Postgres that has been compensating for the lack of explicit BST conversion. To help ensure we do this consistently in future I've DRYed-up the BST conversion into a new utility. I could have just hard-coded the dates but it seemed strange to have the knowledge twice. I've also adjusted the tests so they detect if we accidentally use data from a different financial year. (2) is why none of the test assertions actually need changing and users won't be affected. Sqlalchemy / Postgres quirk =========================== The following queries were run on the same data but results differ: FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 4, 1) FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 3, 31) Looking at the actual query for the first item above still suggests the results should be the same, but for the use of "timestamp". SELECT ... FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate If we try some manual queries with and without '::timestamp' we get: select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 It looks like this is happening because all client connections are aware of the local timezone, and naive datetimes are interpreted as being in UTC - not necessarily true, but saves us here! The monthly API datetimes were pre-converted to dates, so none of this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
)
def get_current_financial_year():
now = datetime.utcnow()
current_month = int(now.strftime('%-m'))
current_year = int(now.strftime('%Y'))
year = current_year if current_month > 3 else current_year - 1
return get_financial_year(year)
def get_april_fools(year):
"""
This function converts the start of the financial year April 1, 00:00 as BST (British Standard Time) to UTC,
2017-10-18 15:09:05 +01:00
the tzinfo is lastly removed from the datetime because the database stores the timestamps without timezone.
:param year: the year to calculate the April 1, 00:00 BST for
:return: the datetime of April 1 for the given year, for example 2016 = 2016-03-31 23:00:00
"""
return local_timezone.localize(
2022-11-14 14:23:54 -05:00
datetime(year, 4, 1, 0, 0, 0)).astimezone(pytz.UTC).replace(tzinfo=None)
def get_month_start_and_end_date_in_utc(month_year):
"""
This function return the start and date of the month_year as UTC,
:param month_year: the datetime to calculate the start and end date for that month
:return: start_date, end_date, month
"""
import calendar
_, num_days = calendar.monthrange(month_year.year, month_year.month)
first_day = datetime(month_year.year, month_year.month, 1, 0, 0, 0)
last_day = datetime(month_year.year, month_year.month, num_days, 23, 59, 59, 99999)
2022-11-10 12:33:25 -05:00
return convert_local_timezone_to_utc(first_day), convert_local_timezone_to_utc(last_day)
def get_current_financial_year_start_year():
now = datetime.now()
financial_year_start = now.year
start_date, end_date = get_financial_year(now.year)
if now < start_date:
financial_year_start = financial_year_start - 1
return financial_year_start
def get_financial_year_for_datetime(start_date):
if type(start_date) == date:
start_date = datetime.combine(start_date, time.min)
year = int(start_date.strftime('%Y'))
if start_date < get_april_fools(year):
return year - 1
else:
return year