Files
notifications-api/app/dao/date_util.py

118 lines
3.4 KiB
Python
Raw Normal View History

2024-05-25 20:59:08 -06:00
import calendar
2021-03-10 13:55:06 +00:00
from datetime import date, datetime, time, timedelta
2024-05-23 13:59:51 -07:00
from app.utils import utc_now
def get_months_for_financial_year(year):
return [
month for month in (get_months_for_year(1, 13, year)) if month < datetime.now()
]
def get_months_for_year(start, end, year):
return [datetime(year, month, 1) for month in range(start, end)]
def get_calendar_year(year):
return get_new_years(year), get_new_years(year + 1) - timedelta(microseconds=1)
def get_calendar_year_dates(year):
year_start_datetime, year_end_datetime = get_calendar_year(year)
Standardise timezones for service usage APIs We want to query for service usage in the BST financial year: 2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 => 2022-04-01 to 2023-03-31 # bst_date Previously we were only doing this explicitly for the monthly API and it seemed like the yearly usage API was incorrectly querying: 2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 => 2022-03-31 to 2023-03-30 # "bst_date" However, it turns out this isn't a problem for two reasons: 1. We've been lucky that none of our rates have changed since 2017, which is long ago enough that no one would care. 2. There's a quirk somewhere in Sqlalchemy / Postgres that has been compensating for the lack of explicit BST conversion. To help ensure we do this consistently in future I've DRYed-up the BST conversion into a new utility. I could have just hard-coded the dates but it seemed strange to have the knowledge twice. I've also adjusted the tests so they detect if we accidentally use data from a different financial year. (2) is why none of the test assertions actually need changing and users won't be affected. Sqlalchemy / Postgres quirk =========================== The following queries were run on the same data but results differ: FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 4, 1) FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 3, 31) Looking at the actual query for the first item above still suggests the results should be the same, but for the use of "timestamp". SELECT ... FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate If we try some manual queries with and without '::timestamp' we get: select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 It looks like this is happening because all client connections are aware of the local timezone, and naive datetimes are interpreted as being in UTC - not necessarily true, but saves us here! The monthly API datetimes were pre-converted to dates, so none of this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
2023-08-29 14:54:30 -07:00
return (year_start_datetime.date(), year_end_datetime.date())
Standardise timezones for service usage APIs We want to query for service usage in the BST financial year: 2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 => 2022-04-01 to 2023-03-31 # bst_date Previously we were only doing this explicitly for the monthly API and it seemed like the yearly usage API was incorrectly querying: 2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 => 2022-03-31 to 2023-03-30 # "bst_date" However, it turns out this isn't a problem for two reasons: 1. We've been lucky that none of our rates have changed since 2017, which is long ago enough that no one would care. 2. There's a quirk somewhere in Sqlalchemy / Postgres that has been compensating for the lack of explicit BST conversion. To help ensure we do this consistently in future I've DRYed-up the BST conversion into a new utility. I could have just hard-coded the dates but it seemed strange to have the knowledge twice. I've also adjusted the tests so they detect if we accidentally use data from a different financial year. (2) is why none of the test assertions actually need changing and users won't be affected. Sqlalchemy / Postgres quirk =========================== The following queries were run on the same data but results differ: FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 4, 1) FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date datetime.date(2021, 3, 31) Looking at the actual query for the first item above still suggests the results should be the same, but for the use of "timestamp". SELECT ... FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type FROM ft_billing WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate If we try some manual queries with and without '::timestamp' we get: select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc; bst_date ------------ 2022-04-21 2022-04-20 It looks like this is happening because all client connections are aware of the local timezone, and naive datetimes are interpreted as being in UTC - not necessarily true, but saves us here! The monthly API datetimes were pre-converted to dates, so none of this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
def get_current_calendar_year():
2024-05-23 13:59:51 -07:00
now = utc_now()
2023-08-29 14:54:30 -07:00
current_year = int(now.strftime("%Y"))
year = current_year
return get_calendar_year(year)
def get_new_years(year):
return datetime(year, 1, 1, 0, 0, 0)
def get_month_start_and_end_date_in_utc(month_year):
"""
2023-08-29 14:54:30 -07:00
This function return the start and date of the month_year as UTC,
:param month_year: the datetime to calculate the start and end date for that month
:return: start_date, end_date, month
"""
import calendar
2023-08-29 14:54:30 -07:00
_, num_days = calendar.monthrange(month_year.year, month_year.month)
first_day = datetime(month_year.year, month_year.month, 1, 0, 0, 0)
last_day = datetime(month_year.year, month_year.month, num_days, 23, 59, 59, 99999)
return first_day, last_day
def get_current_calendar_year_start_year():
now = datetime.now()
financial_year_start = now.year
start_date, end_date = get_calendar_year(now.year)
if now < start_date:
financial_year_start = financial_year_start - 1
return financial_year_start
def get_calendar_year_for_datetime(start_date):
if isinstance(start_date, date):
start_date = datetime.combine(start_date, time.min)
2023-08-29 14:54:30 -07:00
year = int(start_date.strftime("%Y"))
if start_date < get_new_years(year):
return year - 1
else:
return year
2024-05-25 20:59:08 -06:00
def get_number_of_days_for_month(year, month):
return calendar.monthrange(year, month)[1]
def generate_date_range(start_date, end_date=None, days=0):
if end_date:
current_date = start_date
while current_date <= end_date:
try:
yield current_date.date()
except ValueError:
pass
current_date += timedelta(days=1)
elif days > 0:
end_date = start_date + timedelta(days=days)
current_date = start_date
while current_date < end_date:
try:
yield current_date.date()
except ValueError:
pass
current_date += timedelta(days=1)
else:
return "An end_date or number of days must be specified"
def generate_hourly_range(start_date, end_date=None, hours=0):
if end_date:
current_time = start_date
while current_time <= end_date:
try:
yield current_time
except ValueError:
pass
current_time += timedelta(hours=1)
elif hours > 0:
end_time = start_date + timedelta(hours=hours)
current_time = start_date
while current_time < end_time:
try:
yield current_time
except ValueError:
pass
current_time += timedelta(hours=1)
else:
return "An end_date or number of hours must be specified"