2022-02-09 17:44:00 +00:00
|
|
|
from datetime import date, datetime, timedelta
|
2018-04-25 14:24:47 +01:00
|
|
|
|
2018-05-16 12:21:59 +01:00
|
|
|
from flask import current_app
|
2022-04-20 16:51:32 +01:00
|
|
|
from sqlalchemy import Date, Integer, and_, desc, func, union
|
2018-05-15 11:21:10 +01:00
|
|
|
from sqlalchemy.dialects.postgresql import insert
|
2020-07-10 17:43:40 +01:00
|
|
|
from sqlalchemy.sql.expression import case, literal
|
2018-04-06 11:55:49 +01:00
|
|
|
|
|
|
|
|
from app import db
|
2023-08-25 08:10:33 -07:00
|
|
|
from app.dao.date_util import get_calendar_year_dates, get_calendar_year_for_datetime
|
2023-07-10 11:06:29 -07:00
|
|
|
from app.dao.organization_dao import dao_get_organization_live_services
|
2024-01-16 07:37:21 -05:00
|
|
|
from app.enums import KeyType, NotificationStatus, NotificationType
|
2018-04-24 17:37:04 +01:00
|
|
|
from app.models import (
|
2019-08-06 13:29:59 +01:00
|
|
|
AnnualBilling,
|
2021-03-10 13:55:06 +00:00
|
|
|
FactBilling,
|
Use notification view for status / billing tasks
This fixes a bug where (letter) notifications left in sending would
temporarily get excluded from billing and status calculations once
the service retention period had elapsed, and then get included once
again when they finally get marked as delivered.*
Status and billing tasks shouldn't need to have knowledge about which
table their data is in and getting this wrong is the fundamental cause
of the bug here. Adding a view across both tables abstracts this away
while keeping the query complexity the same.
Using a view also has the added benefit that we no longer need to care
when the status / billing tasks run in comparison to the deletion task,
since we will retrieve the same data irrespective (see below for a more
detailed discussion on data integrity).
*Such a scenario is rare but has happened.
A New View
==========
I've included all the columns that are shared between the two tables,
even though only a subset are actually needed. Having extra columns
has no impact and may be useful in future.
Although the view isn't actually a table, SQLAlchemy appears to wrap
it without any issues, noting that the package doesn't have any direct
support for "view models". Because we're never inserting data, we don't
need most of the kwargs when defining columns.*
*Note that the "default" kwarg doesn't affect data that's retrieved,
only data that's written (if no value is set).
Data Integrity
==============
The (new) tests cover the main scenarios.
We need to be careful with how the view interacts with the deletion /
archiving task. There are two concerns here:
- Duplicates. The deletion task inserts before it deletes [^1], so we
could end up double counting. It turns out this isn't a problem because
a Postgres UNION is an implicit "DISTINCT" [^2]. I've also verified this
manually, just to be on the safe side.
- No data. It's conceivable that the query will check the history table
just before the insertion, then check the notifications table just after
the deletion. It turns out this isn't a problem either because the whole
query sees the same DB snapshot [^3][^4].*
*I can't think of a way to test this as it's a race condition, but I'm
confident the Postgres docs are accurate.
Performance
===========
I copied the relevant (non-PII) columns from Production for data going
back to 2022-04-01. I then ran several tests.
Queries using the new view still make use of indices on a per-table basis,
as the following query plan illustrates:
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=1130820.02..1135353.89 rows=46502 width=97) (actual time=629.863..756.703 rows=72 loops=1)
Group Key: notifications_all_time_view.template_id, notifications_all_time_view.sent_by, notifications_all_time_view.rate_multiplier, notifications_all_time_view.international
-> Sort (cost=1130820.02..1131401.28 rows=232506 width=85) (actual time=629.756..708.914 rows=217563 loops=1)
Sort Key: notifications_all_time_view.template_id, notifications_all_time_view.sent_by, notifications_all_time_view.rate_multiplier, notifications_all_time_view.international
Sort Method: external merge Disk: 9320kB
-> Subquery Scan on notifications_all_time_view (cost=1088506.43..1098969.20 rows=232506 width=85) (actual time=416.118..541.669 rows=217563 loops=1)
-> Unique (cost=1088506.43..1096644.14 rows=232506 width=725) (actual time=416.115..513.065 rows=217563 loops=1)
-> Sort (cost=1088506.43..1089087.70 rows=232506 width=725) (actual time=416.115..451.190 rows=217563 loops=1)
Sort Key: notifications_no_pii.id, notifications_no_pii.job_id, notifications_no_pii.service_id, notifications_no_pii.template_id, notifications_no_pii.key_type, notifications_no_pii.billable_units, notifications_no_pii.notification_type, notifications_no_pii.created_at, notifications_no_pii.sent_by, notifications_no_pii.notification_status, notifications_no_pii.international, notifications_no_pii.rate_multiplier, notifications_no_pii.postage
Sort Method: external merge Disk: 23936kB
-> Append (cost=114.42..918374.12 rows=232506 width=725) (actual time=2.051..298.229 rows=217563 loops=1)
-> Bitmap Heap Scan on notifications_no_pii (cost=114.42..8557.55 rows=2042 width=113) (actual time=1.405..1.442 rows=0 loops=1)
Recheck Cond: ((service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'::uuid) AND (notification_type = 'sms'::notification_type) AND (notification_status = ANY ('{sending,sent,delivered,pending,temporary-failure,permanent-failure}'::text[])) AND (created_at >= '2022-05-01 23:00:00'::timestamp without time zone) AND (created_at < '2022-05-02 23:00:00'::timestamp without time zone))
Filter: ((key_type)::text = ANY ('{normal,team}'::text[]))
-> Bitmap Index Scan on ix_notifications_no_piiservice_id_composite (cost=0.00..113.91 rows=2202 width=0) (actual time=1.402..1.439 rows=0 loops=1)
Index Cond: ((service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'::uuid) AND (notification_type = 'sms'::notification_type) AND (notification_status = ANY ('{sending,sent,delivered,pending,temporary-failure,permanent-failure}'::text[])) AND (created_at >= '2022-05-01 23:00:00'::timestamp without time zone) AND (created_at < '2022-05-02 23:00:00'::timestamp without time zone))
-> Index Scan using ix_notifications_history_no_pii_service_id_composite on notifications_history_no_pii (cost=0.70..906328.97 rows=230464 width=113) (actual time=0.645..281.612 rows=217563 loops=1)
Index Cond: ((service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'::uuid) AND ((key_type)::text = ANY ('{normal,team}'::text[])) AND (notification_type = 'sms'::notification_type) AND (created_at >= '2022-05-01 23:00:00'::timestamp without time zone) AND (created_at < '2022-05-02 23:00:00'::timestamp without time zone))
Filter: (notification_status = ANY ('{sending,sent,delivered,pending,temporary-failure,permanent-failure}'::text[]))
Planning Time: 18.032 ms
Execution Time: 759.001 ms
(21 rows)
Queries using the new view appear to be slower than without, but the
differences I've seen are minimal: the original queries execute in
seconds locally and in Production, so it's not a big issue.
Notes: Performance
==================
I downloaded a minimal set of columns for testing:
\copy (
select
id, notification_type, key_type, created_at, service_id,
template_id, sent_by, rate_multiplier, international,
billable_units, postage, job_id, notification_status
from notifications
) to 'notifications.csv' delimiter ',' csv header;
CREATE TABLE notifications_no_pii (
id uuid NOT NULL,
notification_type public.notification_type NOT NULL,
key_type character varying(255) NOT NULL,
created_at timestamp without time zone NOT NULL,
service_id uuid,
template_id uuid,
sent_by character varying,
rate_multiplier numeric,
international boolean,
billable_units integer NOT NULL,
postage character varying,
job_id uuid,
notification_status text
);
copy notifications_no_pii from '/Users/ben.thorner/Desktop/notifications.csv' delimiter ',' csv header;
CREATE INDEX ix_notifications_no_piicreated_at ON notifications_no_pii USING btree (created_at);
CREATE INDEX ix_notifications_no_piijob_id ON notifications_no_pii USING btree (job_id);
CREATE INDEX ix_notifications_no_piinotification_type_composite ON notifications_no_pii USING btree (notification_type, notification_status, created_at);
CREATE INDEX ix_notifications_no_piiservice_created_at ON notifications_no_pii USING btree (service_id, created_at);
CREATE INDEX ix_notifications_no_piiservice_id_composite ON notifications_no_pii USING btree (service_id, notification_type, notification_status, created_at);
CREATE INDEX ix_notifications_no_piitemplate_id ON notifications_no_pii USING btree (template_id);
And similarly for the history table. I then created a sepatate view
across both of these temporary tables using just these columns.
To test performance I created some queries that reflect what is run
by the billing [^5] and status [^6] tasks e.g.
explain analyze select template_id, sent_by, rate_multiplier, international, sum(billable_units), count(*)
from notifications_all_time_view
where
notification_status in ('sending', 'sent', 'delivered', 'pending', 'temporary-failure', 'permanent-failure')
and key_type in ('normal', 'team')
and created_at >= '2022-05-01 23:00'
and created_at < '2022-05-02 23:00'
and notification_type = 'sms'
and service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'
group by 1,2,3,4;
explain analyze select template_id, job_id, key_type, notification_status, count(*)
from notifications_all_time_view
where created_at >= '2022-05-01 23:00'
and created_at < '2022-05-02 23:00'
and notification_type = 'sms'
and service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'
and key_type in ('normal', 'team')
group by 1,2,3,4;
Between running queries I restarted my local database and also ran
a command to purge disk caches [^7].
I tested on a few services:
- c5956607-20b1-48b4-8983-85d11404e61f on 2022-05-02 (high volume)
- 0cc696c6-b792-409d-99e9-64232f461b0f on 2022-04-06 (highest volume)
- 01135db6-7819-4121-8b97-4aa2d741e372 on 2022-04-14 (very low volume)
All execution results are of the same magnitude using the view compared
to the worst case of either table on its own.
[^1]: https://github.com/alphagov/notifications-api/blob/00a04ebf54c97fc695f013de0a497e5490ddb558/app/dao/notifications_dao.py#L389
[^2]: https://stackoverflow.com/questions/49925/what-is-the-difference-between-union-and-union-all
[^3]: https://www.postgresql.org/docs/current/transaction-iso.html
[^4]: https://dba.stackexchange.com/questions/210485/can-sub-selects-change-in-one-single-query-in-a-read-committed-transaction
[^5]: https://github.com/alphagov/notifications-api/blob/00a04ebf54c97fc695f013de0a497e5490ddb558/app/dao/fact_billing_dao.py#L471
[^6]: https://github.com/alphagov/notifications-api/blob/00a04ebf54c97fc695f013de0a497e5490ddb558/app/dao/fact_notification_status_dao.py#L58
[^7]: https://stackoverflow.com/questions/28845524/echo-3-proc-sys-vm-drop-caches-on-mac-osx
2022-05-19 11:28:44 +01:00
|
|
|
NotificationAllTimeView,
|
2021-03-10 13:55:06 +00:00
|
|
|
NotificationHistory,
|
2023-07-10 11:06:29 -07:00
|
|
|
Organization,
|
2021-03-10 13:55:06 +00:00
|
|
|
Rate,
|
|
|
|
|
Service,
|
2018-04-24 17:37:04 +01:00
|
|
|
)
|
2023-05-10 08:39:50 -07:00
|
|
|
from app.utils import get_midnight_in_utc
|
2018-04-09 11:38:00 +01:00
|
|
|
|
|
|
|
|
|
Fix calculating remaining free allowance for SMS
The way it was done before, the remainder was incorrect in the
billing report and in the org usage query - it was the sms remainder
left at the start of the report period, not at the end of that period.
This became apparent when we tried to show sms_remainder on the org
usage report, where start date is always the start of the financial year.
We saw that sms sent by services did not reduce their free allowance
remainder according to the report. As a result of this, we had to
temporarily remove of sms_remainder column from the report, until
we fix the bug - it has been fixed now, yay!
I think the bug has snuck in partially because our fixtures for testing
this part of the code are quite complex, so it was
harder to see that numbers don't add up. I have added comments
to the tests to try and make it a bit clearer why the results are
as they are.
I also added comments to the code, and renamed some variables,
to make it easier to understand, as there are quite a few
moving parts in it - subqueries and the like.
I also renamed the fetch_sms_free_allowance_remainder method to
fetch_sms_free_allowance_remainder_until_date so it is clearer
what it does.
2021-12-09 17:50:03 +00:00
|
|
|
def fetch_sms_free_allowance_remainder_until_date(end_date):
|
2019-08-06 13:29:59 +01:00
|
|
|
# ASSUMPTION: AnnualBilling has been populated for year.
|
2023-06-14 13:19:11 -07:00
|
|
|
billing_year = get_calendar_year_for_datetime(end_date)
|
2019-08-30 17:16:43 +01:00
|
|
|
start_of_year = date(billing_year, 4, 1)
|
2019-08-29 17:55:50 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
billable_units = func.coalesce(
|
|
|
|
|
func.sum(FactBilling.billable_units * FactBilling.rate_multiplier), 0
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
query = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
AnnualBilling.service_id.label("service_id"),
|
|
|
|
|
AnnualBilling.free_sms_fragment_limit,
|
|
|
|
|
billable_units.label("billable_units"),
|
|
|
|
|
func.greatest(
|
|
|
|
|
(AnnualBilling.free_sms_fragment_limit - billable_units).cast(Integer),
|
|
|
|
|
0,
|
|
|
|
|
).label("sms_remainder"),
|
|
|
|
|
)
|
|
|
|
|
.outerjoin(
|
|
|
|
|
# if there are no ft_billing rows for a service we still want to return the annual billing so we can use the
|
|
|
|
|
# free_sms_fragment_limit)
|
|
|
|
|
FactBilling,
|
|
|
|
|
and_(
|
|
|
|
|
AnnualBilling.service_id == FactBilling.service_id,
|
|
|
|
|
FactBilling.local_date >= start_of_year,
|
|
|
|
|
FactBilling.local_date < end_date,
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
2023-08-23 10:35:43 -07:00
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
AnnualBilling.financial_year_start == billing_year,
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
AnnualBilling.service_id,
|
|
|
|
|
AnnualBilling.free_sms_fragment_limit,
|
2019-08-29 17:55:50 +01:00
|
|
|
)
|
2019-08-06 13:29:59 +01:00
|
|
|
)
|
|
|
|
|
return query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_sms_billing_for_all_services(start_date, end_date):
|
|
|
|
|
# ASSUMPTION: AnnualBilling has been populated for year.
|
2023-08-23 10:35:43 -07:00
|
|
|
allowance_left_at_start_date_query = fetch_sms_free_allowance_remainder_until_date(
|
|
|
|
|
start_date
|
|
|
|
|
).subquery()
|
2019-08-28 17:28:14 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
sms_billable_units = func.sum(
|
|
|
|
|
FactBilling.billable_units * FactBilling.rate_multiplier
|
|
|
|
|
)
|
Fix calculating remaining free allowance for SMS
The way it was done before, the remainder was incorrect in the
billing report and in the org usage query - it was the sms remainder
left at the start of the report period, not at the end of that period.
This became apparent when we tried to show sms_remainder on the org
usage report, where start date is always the start of the financial year.
We saw that sms sent by services did not reduce their free allowance
remainder according to the report. As a result of this, we had to
temporarily remove of sms_remainder column from the report, until
we fix the bug - it has been fixed now, yay!
I think the bug has snuck in partially because our fixtures for testing
this part of the code are quite complex, so it was
harder to see that numbers don't add up. I have added comments
to the tests to try and make it a bit clearer why the results are
as they are.
I also added comments to the code, and renamed some variables,
to make it easier to understand, as there are quite a few
moving parts in it - subqueries and the like.
I also renamed the fetch_sms_free_allowance_remainder method to
fetch_sms_free_allowance_remainder_until_date so it is clearer
what it does.
2021-12-09 17:50:03 +00:00
|
|
|
|
|
|
|
|
# subtract sms_billable_units units accrued since report's start date to get up-to-date
|
|
|
|
|
# allowance remainder
|
2023-08-23 10:35:43 -07:00
|
|
|
sms_allowance_left = func.greatest(
|
|
|
|
|
allowance_left_at_start_date_query.c.sms_remainder - sms_billable_units, 0
|
|
|
|
|
)
|
Fix calculating remaining free allowance for SMS
The way it was done before, the remainder was incorrect in the
billing report and in the org usage query - it was the sms remainder
left at the start of the report period, not at the end of that period.
This became apparent when we tried to show sms_remainder on the org
usage report, where start date is always the start of the financial year.
We saw that sms sent by services did not reduce their free allowance
remainder according to the report. As a result of this, we had to
temporarily remove of sms_remainder column from the report, until
we fix the bug - it has been fixed now, yay!
I think the bug has snuck in partially because our fixtures for testing
this part of the code are quite complex, so it was
harder to see that numbers don't add up. I have added comments
to the tests to try and make it a bit clearer why the results are
as they are.
I also added comments to the code, and renamed some variables,
to make it easier to understand, as there are quite a few
moving parts in it - subqueries and the like.
I also renamed the fetch_sms_free_allowance_remainder method to
fetch_sms_free_allowance_remainder_until_date so it is clearer
what it does.
2021-12-09 17:50:03 +00:00
|
|
|
|
|
|
|
|
# billable units here are for period between start date and end date only, so to see
|
|
|
|
|
# how many are chargeable, we need to see how much free allowance was used up in the
|
|
|
|
|
# period up until report's start date and then do a subtraction
|
2023-08-23 10:35:43 -07:00
|
|
|
chargeable_sms = func.greatest(
|
|
|
|
|
sms_billable_units - allowance_left_at_start_date_query.c.sms_remainder, 0
|
|
|
|
|
)
|
2019-08-28 17:28:14 +01:00
|
|
|
sms_cost = chargeable_sms * FactBilling.rate
|
2019-08-06 13:29:59 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
query = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
Organization.name.label("organization_name"),
|
|
|
|
|
Organization.id.label("organization_id"),
|
|
|
|
|
Service.name.label("service_name"),
|
|
|
|
|
Service.id.label("service_id"),
|
|
|
|
|
allowance_left_at_start_date_query.c.free_sms_fragment_limit,
|
|
|
|
|
FactBilling.rate.label("sms_rate"),
|
|
|
|
|
sms_allowance_left.label("sms_remainder"),
|
|
|
|
|
sms_billable_units.label("sms_billable_units"),
|
|
|
|
|
chargeable_sms.label("chargeable_billable_sms"),
|
|
|
|
|
sms_cost.label("sms_cost"),
|
|
|
|
|
)
|
|
|
|
|
.select_from(Service)
|
|
|
|
|
.outerjoin(
|
|
|
|
|
allowance_left_at_start_date_query,
|
|
|
|
|
Service.id == allowance_left_at_start_date_query.c.service_id,
|
|
|
|
|
)
|
|
|
|
|
.outerjoin(Service.organization)
|
|
|
|
|
.join(
|
|
|
|
|
FactBilling,
|
|
|
|
|
FactBilling.service_id == Service.id,
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
FactBilling.local_date >= start_date,
|
|
|
|
|
FactBilling.local_date <= end_date,
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
2023-08-23 10:35:43 -07:00
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
Organization.name,
|
|
|
|
|
Organization.id,
|
|
|
|
|
Service.id,
|
|
|
|
|
Service.name,
|
|
|
|
|
allowance_left_at_start_date_query.c.free_sms_fragment_limit,
|
|
|
|
|
allowance_left_at_start_date_query.c.sms_remainder,
|
|
|
|
|
FactBilling.rate,
|
|
|
|
|
)
|
|
|
|
|
.order_by(Organization.name, Service.name)
|
2019-08-06 13:29:59 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return query.all()
|
|
|
|
|
|
|
|
|
|
|
2018-05-11 16:25:16 +01:00
|
|
|
def fetch_billing_totals_for_year(service_id, year):
|
2018-05-16 12:21:59 +01:00
|
|
|
"""
|
2022-04-20 16:51:32 +01:00
|
|
|
Returns a row for each distinct rate and notification_type from ft_billing
|
|
|
|
|
over the specified financial year e.g.
|
|
|
|
|
|
|
|
|
|
(
|
|
|
|
|
rate=0.0165,
|
|
|
|
|
notification_type=sms,
|
|
|
|
|
notifications_sent=123,
|
|
|
|
|
...
|
|
|
|
|
)
|
2018-05-16 12:21:59 +01:00
|
|
|
|
2022-04-20 16:51:32 +01:00
|
|
|
The "query_service_<type>..." subqueries for each notification_type all
|
|
|
|
|
return the same columns but differ internally e.g. SMS has to incorporate
|
|
|
|
|
a rate multiplier. Each subquery returns the same set of columns, which we
|
|
|
|
|
pick from here before the big union.
|
|
|
|
|
"""
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(
|
|
|
|
|
union(
|
|
|
|
|
*[
|
|
|
|
|
db.session.query(
|
|
|
|
|
query.c.notification_type.label("notification_type"),
|
|
|
|
|
query.c.rate.label("rate"),
|
|
|
|
|
func.sum(query.c.notifications_sent).label(
|
|
|
|
|
"notifications_sent"
|
|
|
|
|
),
|
|
|
|
|
func.sum(query.c.chargeable_units).label("chargeable_units"),
|
|
|
|
|
func.sum(query.c.cost).label("cost"),
|
|
|
|
|
func.sum(query.c.free_allowance_used).label(
|
|
|
|
|
"free_allowance_used"
|
|
|
|
|
),
|
|
|
|
|
func.sum(query.c.charged_units).label("charged_units"),
|
|
|
|
|
).group_by(query.c.rate, query.c.notification_type)
|
|
|
|
|
for query in [
|
|
|
|
|
query_service_sms_usage_for_year(service_id, year).subquery(),
|
|
|
|
|
query_service_email_usage_for_year(service_id, year).subquery(),
|
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
).subquery()
|
|
|
|
|
)
|
|
|
|
|
.order_by(
|
|
|
|
|
"notification_type",
|
|
|
|
|
"rate",
|
|
|
|
|
)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2018-05-11 16:25:16 +01:00
|
|
|
|
|
|
|
|
|
2018-04-27 15:15:55 +01:00
|
|
|
def fetch_monthly_billing_for_year(service_id, year):
|
2022-04-20 16:51:32 +01:00
|
|
|
"""
|
2023-03-02 20:20:31 -05:00
|
|
|
Returns a row for each distinct rate, notification_type, and month
|
2022-04-20 16:51:32 +01:00
|
|
|
from ft_billing over the specified financial year e.g.
|
|
|
|
|
|
|
|
|
|
(
|
|
|
|
|
rate=0.0165,
|
|
|
|
|
notification_type=sms,
|
|
|
|
|
month=2022-04-01 00:00:00,
|
|
|
|
|
notifications_sent=123,
|
|
|
|
|
...
|
|
|
|
|
)
|
|
|
|
|
|
2023-03-02 20:20:31 -05:00
|
|
|
Each subquery takes care of anything specific to the notification type e.g.
|
|
|
|
|
rate multipliers for SMS.
|
2022-04-20 16:51:32 +01:00
|
|
|
|
|
|
|
|
Since the data in ft_billing is only refreshed once a day for all services,
|
|
|
|
|
we also update the table on-the-fly if we need accurate data for this year.
|
|
|
|
|
"""
|
2023-06-14 13:19:11 -07:00
|
|
|
_, year_end = get_calendar_year_dates(year)
|
2023-05-10 08:39:50 -07:00
|
|
|
today = datetime.utcnow().date()
|
Standardise timezones for service usage APIs
We want to query for service usage in the BST financial year:
2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 =>
2022-04-01 to 2023-03-31 # bst_date
Previously we were only doing this explicitly for the monthly API
and it seemed like the yearly usage API was incorrectly querying:
2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 =>
2022-03-31 to 2023-03-30 # "bst_date"
However, it turns out this isn't a problem for two reasons:
1. We've been lucky that none of our rates have changed since 2017,
which is long ago enough that no one would care.
2. There's a quirk somewhere in Sqlalchemy / Postgres that has been
compensating for the lack of explicit BST conversion.
To help ensure we do this consistently in future I've DRYed-up the
BST conversion into a new utility. I could have just hard-coded the
dates but it seemed strange to have the knowledge twice.
I've also adjusted the tests so they detect if we accidentally use
data from a different financial year. (2) is why none of the test
assertions actually need changing and users won't be affected.
Sqlalchemy / Postgres quirk
===========================
The following queries were run on the same data but results differ:
FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 4, 1)
FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 3, 31)
Looking at the actual query for the first item above still suggests
the results should be the same, but for the use of "timestamp".
SELECT ...
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate
If we try some manual queries with and without '::timestamp' we get:
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
It looks like this is happening because all client connections are
aware of the local timezone, and naive datetimes are interpreted as
being in UTC - not necessarily true, but saves us here!
The monthly API datetimes were pre-converted to dates, so none of
this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
|
|
|
|
2018-04-09 11:38:00 +01:00
|
|
|
# if year end date is less than today, we are calculating for data in the past and have no need for deltas.
|
Standardise timezones for service usage APIs
We want to query for service usage in the BST financial year:
2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 =>
2022-04-01 to 2023-03-31 # bst_date
Previously we were only doing this explicitly for the monthly API
and it seemed like the yearly usage API was incorrectly querying:
2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 =>
2022-03-31 to 2023-03-30 # "bst_date"
However, it turns out this isn't a problem for two reasons:
1. We've been lucky that none of our rates have changed since 2017,
which is long ago enough that no one would care.
2. There's a quirk somewhere in Sqlalchemy / Postgres that has been
compensating for the lack of explicit BST conversion.
To help ensure we do this consistently in future I've DRYed-up the
BST conversion into a new utility. I could have just hard-coded the
dates but it seemed strange to have the knowledge twice.
I've also adjusted the tests so they detect if we accidentally use
data from a different financial year. (2) is why none of the test
assertions actually need changing and users won't be affected.
Sqlalchemy / Postgres quirk
===========================
The following queries were run on the same data but results differ:
FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 4, 1)
FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 3, 31)
Looking at the actual query for the first item above still suggests
the results should be the same, but for the use of "timestamp".
SELECT ...
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate
If we try some manual queries with and without '::timestamp' we get:
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
It looks like this is happening because all client connections are
aware of the local timezone, and naive datetimes are interpreted as
being in UTC - not necessarily true, but saves us here!
The monthly API datetimes were pre-converted to dates, so none of
this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
|
|
|
if year_end >= today:
|
2023-08-23 10:35:43 -07:00
|
|
|
data = fetch_billing_data_for_day(
|
|
|
|
|
process_day=today, service_id=service_id, check_permissions=True
|
|
|
|
|
)
|
2020-02-26 17:38:20 +00:00
|
|
|
for d in data:
|
|
|
|
|
update_fact_billing(data=d, process_day=today)
|
2018-04-25 14:24:47 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(
|
|
|
|
|
union(
|
|
|
|
|
*[
|
|
|
|
|
db.session.query(
|
|
|
|
|
query.c.rate.label("rate"),
|
|
|
|
|
query.c.notification_type.label("notification_type"),
|
|
|
|
|
func.date_trunc("month", query.c.local_date)
|
|
|
|
|
.cast(Date)
|
|
|
|
|
.label("month"),
|
|
|
|
|
func.sum(query.c.notifications_sent).label(
|
|
|
|
|
"notifications_sent"
|
|
|
|
|
),
|
|
|
|
|
func.sum(query.c.chargeable_units).label("chargeable_units"),
|
|
|
|
|
func.sum(query.c.cost).label("cost"),
|
|
|
|
|
func.sum(query.c.free_allowance_used).label(
|
|
|
|
|
"free_allowance_used"
|
|
|
|
|
),
|
|
|
|
|
func.sum(query.c.charged_units).label("charged_units"),
|
|
|
|
|
).group_by(
|
|
|
|
|
query.c.rate,
|
|
|
|
|
query.c.notification_type,
|
|
|
|
|
"month",
|
|
|
|
|
)
|
|
|
|
|
for query in [
|
|
|
|
|
query_service_sms_usage_for_year(service_id, year).subquery(),
|
|
|
|
|
query_service_email_usage_for_year(service_id, year).subquery(),
|
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
).subquery()
|
|
|
|
|
)
|
|
|
|
|
.order_by(
|
|
|
|
|
"month",
|
|
|
|
|
"notification_type",
|
|
|
|
|
"rate",
|
|
|
|
|
)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2022-04-20 16:51:32 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def query_service_email_usage_for_year(service_id, year):
|
2023-06-14 13:19:11 -07:00
|
|
|
year_start, year_end = get_calendar_year_dates(year)
|
2022-04-20 16:51:32 +01:00
|
|
|
|
|
|
|
|
return db.session.query(
|
2022-11-21 11:49:59 -05:00
|
|
|
FactBilling.local_date,
|
2022-04-20 16:51:32 +01:00
|
|
|
FactBilling.notifications_sent,
|
2022-04-20 16:55:38 +01:00
|
|
|
FactBilling.billable_units.label("chargeable_units"),
|
2022-04-20 16:51:32 +01:00
|
|
|
FactBilling.rate,
|
|
|
|
|
FactBilling.notification_type,
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
literal(0).label("cost"),
|
2022-04-21 12:55:30 +01:00
|
|
|
literal(0).label("free_allowance_used"),
|
2022-04-26 17:56:17 +01:00
|
|
|
FactBilling.billable_units.label("charged_units"),
|
2018-05-16 12:21:59 +01:00
|
|
|
).filter(
|
|
|
|
|
FactBilling.service_id == service_id,
|
2022-11-21 11:49:59 -05:00
|
|
|
FactBilling.local_date >= year_start,
|
|
|
|
|
FactBilling.local_date <= year_end,
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.EMAIL,
|
2022-04-20 16:51:32 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def query_service_sms_usage_for_year(service_id, year):
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
"""
|
|
|
|
|
Returns rows from the ft_billing table with some calculated values like cost,
|
|
|
|
|
incorporating the SMS free allowance e.g.
|
|
|
|
|
|
|
|
|
|
(
|
2022-11-21 11:49:59 -05:00
|
|
|
local_date=2022-04-27,
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
notifications_sent=12,
|
|
|
|
|
chargeable_units=12,
|
|
|
|
|
rate=0.0165,
|
|
|
|
|
[cost=0 <== covered by the free allowance],
|
|
|
|
|
[cost=0.198 <== if free allowance exhausted],
|
|
|
|
|
[cost=0.099 <== only some free allowance left],
|
|
|
|
|
...
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
In order to calculate how much free allowance is left, we need to work out
|
2022-11-21 11:49:59 -05:00
|
|
|
how much was used for previous local_dates - cumulative_chargeable_units -
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
which we then subtract from the free allowance for the year.
|
|
|
|
|
|
|
|
|
|
cumulative_chargeable_units is calculated using a "window" clause, which has
|
|
|
|
|
access to all the rows identified by the query filter. Note that it's not
|
|
|
|
|
affected by any GROUP BY clauses that happen in outer queries.
|
|
|
|
|
|
|
|
|
|
https://www.postgresql.org/docs/current/tutorial-window.html
|
|
|
|
|
|
|
|
|
|
ASSUMPTION: rates always change at midnight i.e. there can only be one rate
|
2022-11-21 11:49:59 -05:00
|
|
|
on a given local_date. This means we don't need to worry about how to assign
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
free allowance if it happens to run out when a rate changes.
|
|
|
|
|
"""
|
2023-06-14 13:19:11 -07:00
|
|
|
year_start, year_end = get_calendar_year_dates(year)
|
2023-08-23 10:35:43 -07:00
|
|
|
this_rows_chargeable_units = (
|
|
|
|
|
FactBilling.billable_units * FactBilling.rate_multiplier
|
|
|
|
|
)
|
2022-04-20 16:51:32 +01:00
|
|
|
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
# Subquery for the number of chargeable units in all rows preceding this one,
|
2022-04-21 12:55:30 +01:00
|
|
|
# which might be none if this is the first row (hence the "coalesce"). For
|
|
|
|
|
# some reason the end result is a decimal despite all the input columns being
|
|
|
|
|
# integer - this seems to be a Sqlalchemy quirk (works in raw SQL).
|
2022-05-18 11:45:51 +01:00
|
|
|
chargeable_units_used_before_this_row = func.coalesce(
|
2023-08-23 10:35:43 -07:00
|
|
|
func.sum(this_rows_chargeable_units)
|
|
|
|
|
.over(
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
# order is "ASC" by default
|
2022-11-21 11:49:59 -05:00
|
|
|
order_by=[FactBilling.local_date],
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
# first row to previous row
|
2023-08-23 10:35:43 -07:00
|
|
|
rows=(None, -1),
|
|
|
|
|
)
|
|
|
|
|
.cast(Integer),
|
|
|
|
|
0,
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Subquery for how much free allowance we have left before the current row,
|
|
|
|
|
# so we can work out the cost for this row after taking it into account.
|
2022-05-18 11:45:51 +01:00
|
|
|
remaining_free_allowance_before_this_row = func.greatest(
|
2023-08-23 10:35:43 -07:00
|
|
|
AnnualBilling.free_sms_fragment_limit - chargeable_units_used_before_this_row, 0
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Subquery for the number of chargeable_units that we will actually charge
|
|
|
|
|
# for, after taking any remaining free allowance into account.
|
2023-08-23 10:35:43 -07:00
|
|
|
charged_units = func.greatest(
|
|
|
|
|
this_rows_chargeable_units - remaining_free_allowance_before_this_row, 0
|
|
|
|
|
)
|
Add costs to each row in yearly usage API
This will replace the manual calculations in Admin [^1][^2] for SMS
and also in API [^3] for annual letter costs.
Doing the calculation here also means we correctly attribute free
allowance to the earliest rows in the billing table - Admin doesn't
know when a given rate was applied so can't do this without making
assumptions about when we change our rates.
Since the calculation now depends on annual billing, we need to
change all the tests to make sure a suitable row exists. I've also
adjusted the test data to match the assumption that there can only
be one SMS rate per bst_date.
Note about "OVER" clause
========================
Using "rows=" ("ROWS BETWEEN") makes more sense than "range=" as
we want the remainder to be incremental within each group in a
"GROUP BY" clause, as well as between groups i.e
# ROWS BETWEEN (arbitrary numbers to illustrate)
date=2021-04-03, units=3, cost=3.29
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
vs.
# RANGE BETWEEN
date=2021-04-03, units=3, cost=4.17
date=2021-04-03, units=2, cost=4.17
date=2021-04-04, units=2, cost=5.10
See [^4] for more details and examples.
[^1]: https://github.com/alphagov/notifications-admin/blob/master/app/templates/views/usage.html#L60
[^2]: https://github.com/alphagov/notifications-api/blob/072c3b207940597aacb5bebdbf3757f848d22cd6/app/billing/billing_schemas.py#L37
[^3]: https://github.com/alphagov/notifications-admin/blob/474d7dfda834ebf2f0966f176fb6da556808d8a1/app/templates/views/usage.html#L98
[^4]: https://learnsql.com/blog/difference-between-rows-range-window-functions/
2022-04-20 17:14:17 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
free_allowance_used = func.least(
|
|
|
|
|
remaining_free_allowance_before_this_row, this_rows_chargeable_units
|
|
|
|
|
)
|
2022-04-21 12:55:30 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
FactBilling.notifications_sent,
|
|
|
|
|
this_rows_chargeable_units.label("chargeable_units"),
|
|
|
|
|
FactBilling.rate,
|
|
|
|
|
FactBilling.notification_type,
|
|
|
|
|
(charged_units * FactBilling.rate).label("cost"),
|
|
|
|
|
free_allowance_used.label("free_allowance_used"),
|
|
|
|
|
charged_units.label("charged_units"),
|
|
|
|
|
)
|
|
|
|
|
.join(AnnualBilling, AnnualBilling.service_id == service_id)
|
|
|
|
|
.filter(
|
|
|
|
|
FactBilling.service_id == service_id,
|
|
|
|
|
FactBilling.local_date >= year_start,
|
|
|
|
|
FactBilling.local_date <= year_end,
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
2023-08-23 10:35:43 -07:00
|
|
|
AnnualBilling.financial_year_start == year,
|
|
|
|
|
)
|
2018-05-16 12:21:59 +01:00
|
|
|
)
|
|
|
|
|
|
2018-07-26 18:41:06 +01:00
|
|
|
|
|
|
|
|
def delete_billing_data_for_service_for_day(process_day, service_id):
|
|
|
|
|
"""
|
2022-11-21 11:49:59 -05:00
|
|
|
Delete all ft_billing data for a given service on a given local_date
|
2018-07-26 18:41:06 +01:00
|
|
|
|
|
|
|
|
Returns how many rows were deleted
|
|
|
|
|
"""
|
|
|
|
|
return FactBilling.query.filter(
|
2023-08-23 10:35:43 -07:00
|
|
|
FactBilling.local_date == process_day, FactBilling.service_id == service_id
|
2018-07-26 18:41:06 +01:00
|
|
|
).delete()
|
2018-04-24 17:37:04 +01:00
|
|
|
|
|
|
|
|
|
2020-02-19 16:57:04 +00:00
|
|
|
def fetch_billing_data_for_day(process_day, service_id=None, check_permissions=False):
|
2023-05-10 08:39:50 -07:00
|
|
|
start_date = get_midnight_in_utc(process_day)
|
|
|
|
|
end_date = get_midnight_in_utc(process_day + timedelta(days=1))
|
2023-08-23 10:35:43 -07:00
|
|
|
current_app.logger.info(
|
|
|
|
|
"Populate ft_billing for {} to {}".format(start_date, end_date)
|
|
|
|
|
)
|
2018-10-24 17:07:30 +01:00
|
|
|
transit_data = []
|
2019-07-18 15:29:54 +01:00
|
|
|
if not service_id:
|
2019-08-21 16:13:04 +01:00
|
|
|
services = Service.query.all()
|
2019-07-18 15:29:54 +01:00
|
|
|
else:
|
2019-08-21 16:13:04 +01:00
|
|
|
services = [Service.query.get(service_id)]
|
|
|
|
|
|
|
|
|
|
for service in services:
|
2024-02-28 12:40:52 -05:00
|
|
|
for notification_type in (NotificationType.SMS, NotificationType.EMAIL):
|
2020-02-19 16:57:04 +00:00
|
|
|
if (not check_permissions) or service.has_permission(notification_type):
|
|
|
|
|
results = _query_for_billing_data(
|
|
|
|
|
notification_type=notification_type,
|
|
|
|
|
start_date=start_date,
|
|
|
|
|
end_date=end_date,
|
2023-08-23 10:35:43 -07:00
|
|
|
service=service,
|
2020-02-19 16:57:04 +00:00
|
|
|
)
|
|
|
|
|
transit_data += results
|
2018-10-24 17:07:30 +01:00
|
|
|
|
|
|
|
|
return transit_data
|
2018-04-24 17:37:04 +01:00
|
|
|
|
|
|
|
|
|
Use notification view for status / billing tasks
This fixes a bug where (letter) notifications left in sending would
temporarily get excluded from billing and status calculations once
the service retention period had elapsed, and then get included once
again when they finally get marked as delivered.*
Status and billing tasks shouldn't need to have knowledge about which
table their data is in and getting this wrong is the fundamental cause
of the bug here. Adding a view across both tables abstracts this away
while keeping the query complexity the same.
Using a view also has the added benefit that we no longer need to care
when the status / billing tasks run in comparison to the deletion task,
since we will retrieve the same data irrespective (see below for a more
detailed discussion on data integrity).
*Such a scenario is rare but has happened.
A New View
==========
I've included all the columns that are shared between the two tables,
even though only a subset are actually needed. Having extra columns
has no impact and may be useful in future.
Although the view isn't actually a table, SQLAlchemy appears to wrap
it without any issues, noting that the package doesn't have any direct
support for "view models". Because we're never inserting data, we don't
need most of the kwargs when defining columns.*
*Note that the "default" kwarg doesn't affect data that's retrieved,
only data that's written (if no value is set).
Data Integrity
==============
The (new) tests cover the main scenarios.
We need to be careful with how the view interacts with the deletion /
archiving task. There are two concerns here:
- Duplicates. The deletion task inserts before it deletes [^1], so we
could end up double counting. It turns out this isn't a problem because
a Postgres UNION is an implicit "DISTINCT" [^2]. I've also verified this
manually, just to be on the safe side.
- No data. It's conceivable that the query will check the history table
just before the insertion, then check the notifications table just after
the deletion. It turns out this isn't a problem either because the whole
query sees the same DB snapshot [^3][^4].*
*I can't think of a way to test this as it's a race condition, but I'm
confident the Postgres docs are accurate.
Performance
===========
I copied the relevant (non-PII) columns from Production for data going
back to 2022-04-01. I then ran several tests.
Queries using the new view still make use of indices on a per-table basis,
as the following query plan illustrates:
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=1130820.02..1135353.89 rows=46502 width=97) (actual time=629.863..756.703 rows=72 loops=1)
Group Key: notifications_all_time_view.template_id, notifications_all_time_view.sent_by, notifications_all_time_view.rate_multiplier, notifications_all_time_view.international
-> Sort (cost=1130820.02..1131401.28 rows=232506 width=85) (actual time=629.756..708.914 rows=217563 loops=1)
Sort Key: notifications_all_time_view.template_id, notifications_all_time_view.sent_by, notifications_all_time_view.rate_multiplier, notifications_all_time_view.international
Sort Method: external merge Disk: 9320kB
-> Subquery Scan on notifications_all_time_view (cost=1088506.43..1098969.20 rows=232506 width=85) (actual time=416.118..541.669 rows=217563 loops=1)
-> Unique (cost=1088506.43..1096644.14 rows=232506 width=725) (actual time=416.115..513.065 rows=217563 loops=1)
-> Sort (cost=1088506.43..1089087.70 rows=232506 width=725) (actual time=416.115..451.190 rows=217563 loops=1)
Sort Key: notifications_no_pii.id, notifications_no_pii.job_id, notifications_no_pii.service_id, notifications_no_pii.template_id, notifications_no_pii.key_type, notifications_no_pii.billable_units, notifications_no_pii.notification_type, notifications_no_pii.created_at, notifications_no_pii.sent_by, notifications_no_pii.notification_status, notifications_no_pii.international, notifications_no_pii.rate_multiplier, notifications_no_pii.postage
Sort Method: external merge Disk: 23936kB
-> Append (cost=114.42..918374.12 rows=232506 width=725) (actual time=2.051..298.229 rows=217563 loops=1)
-> Bitmap Heap Scan on notifications_no_pii (cost=114.42..8557.55 rows=2042 width=113) (actual time=1.405..1.442 rows=0 loops=1)
Recheck Cond: ((service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'::uuid) AND (notification_type = 'sms'::notification_type) AND (notification_status = ANY ('{sending,sent,delivered,pending,temporary-failure,permanent-failure}'::text[])) AND (created_at >= '2022-05-01 23:00:00'::timestamp without time zone) AND (created_at < '2022-05-02 23:00:00'::timestamp without time zone))
Filter: ((key_type)::text = ANY ('{normal,team}'::text[]))
-> Bitmap Index Scan on ix_notifications_no_piiservice_id_composite (cost=0.00..113.91 rows=2202 width=0) (actual time=1.402..1.439 rows=0 loops=1)
Index Cond: ((service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'::uuid) AND (notification_type = 'sms'::notification_type) AND (notification_status = ANY ('{sending,sent,delivered,pending,temporary-failure,permanent-failure}'::text[])) AND (created_at >= '2022-05-01 23:00:00'::timestamp without time zone) AND (created_at < '2022-05-02 23:00:00'::timestamp without time zone))
-> Index Scan using ix_notifications_history_no_pii_service_id_composite on notifications_history_no_pii (cost=0.70..906328.97 rows=230464 width=113) (actual time=0.645..281.612 rows=217563 loops=1)
Index Cond: ((service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'::uuid) AND ((key_type)::text = ANY ('{normal,team}'::text[])) AND (notification_type = 'sms'::notification_type) AND (created_at >= '2022-05-01 23:00:00'::timestamp without time zone) AND (created_at < '2022-05-02 23:00:00'::timestamp without time zone))
Filter: (notification_status = ANY ('{sending,sent,delivered,pending,temporary-failure,permanent-failure}'::text[]))
Planning Time: 18.032 ms
Execution Time: 759.001 ms
(21 rows)
Queries using the new view appear to be slower than without, but the
differences I've seen are minimal: the original queries execute in
seconds locally and in Production, so it's not a big issue.
Notes: Performance
==================
I downloaded a minimal set of columns for testing:
\copy (
select
id, notification_type, key_type, created_at, service_id,
template_id, sent_by, rate_multiplier, international,
billable_units, postage, job_id, notification_status
from notifications
) to 'notifications.csv' delimiter ',' csv header;
CREATE TABLE notifications_no_pii (
id uuid NOT NULL,
notification_type public.notification_type NOT NULL,
key_type character varying(255) NOT NULL,
created_at timestamp without time zone NOT NULL,
service_id uuid,
template_id uuid,
sent_by character varying,
rate_multiplier numeric,
international boolean,
billable_units integer NOT NULL,
postage character varying,
job_id uuid,
notification_status text
);
copy notifications_no_pii from '/Users/ben.thorner/Desktop/notifications.csv' delimiter ',' csv header;
CREATE INDEX ix_notifications_no_piicreated_at ON notifications_no_pii USING btree (created_at);
CREATE INDEX ix_notifications_no_piijob_id ON notifications_no_pii USING btree (job_id);
CREATE INDEX ix_notifications_no_piinotification_type_composite ON notifications_no_pii USING btree (notification_type, notification_status, created_at);
CREATE INDEX ix_notifications_no_piiservice_created_at ON notifications_no_pii USING btree (service_id, created_at);
CREATE INDEX ix_notifications_no_piiservice_id_composite ON notifications_no_pii USING btree (service_id, notification_type, notification_status, created_at);
CREATE INDEX ix_notifications_no_piitemplate_id ON notifications_no_pii USING btree (template_id);
And similarly for the history table. I then created a sepatate view
across both of these temporary tables using just these columns.
To test performance I created some queries that reflect what is run
by the billing [^5] and status [^6] tasks e.g.
explain analyze select template_id, sent_by, rate_multiplier, international, sum(billable_units), count(*)
from notifications_all_time_view
where
notification_status in ('sending', 'sent', 'delivered', 'pending', 'temporary-failure', 'permanent-failure')
and key_type in ('normal', 'team')
and created_at >= '2022-05-01 23:00'
and created_at < '2022-05-02 23:00'
and notification_type = 'sms'
and service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'
group by 1,2,3,4;
explain analyze select template_id, job_id, key_type, notification_status, count(*)
from notifications_all_time_view
where created_at >= '2022-05-01 23:00'
and created_at < '2022-05-02 23:00'
and notification_type = 'sms'
and service_id = 'c5956607-20b1-48b4-8983-85d11404e61f'
and key_type in ('normal', 'team')
group by 1,2,3,4;
Between running queries I restarted my local database and also ran
a command to purge disk caches [^7].
I tested on a few services:
- c5956607-20b1-48b4-8983-85d11404e61f on 2022-05-02 (high volume)
- 0cc696c6-b792-409d-99e9-64232f461b0f on 2022-04-06 (highest volume)
- 01135db6-7819-4121-8b97-4aa2d741e372 on 2022-04-14 (very low volume)
All execution results are of the same magnitude using the view compared
to the worst case of either table on its own.
[^1]: https://github.com/alphagov/notifications-api/blob/00a04ebf54c97fc695f013de0a497e5490ddb558/app/dao/notifications_dao.py#L389
[^2]: https://stackoverflow.com/questions/49925/what-is-the-difference-between-union-and-union-all
[^3]: https://www.postgresql.org/docs/current/transaction-iso.html
[^4]: https://dba.stackexchange.com/questions/210485/can-sub-selects-change-in-one-single-query-in-a-read-committed-transaction
[^5]: https://github.com/alphagov/notifications-api/blob/00a04ebf54c97fc695f013de0a497e5490ddb558/app/dao/fact_billing_dao.py#L471
[^6]: https://github.com/alphagov/notifications-api/blob/00a04ebf54c97fc695f013de0a497e5490ddb558/app/dao/fact_notification_status_dao.py#L58
[^7]: https://stackoverflow.com/questions/28845524/echo-3-proc-sys-vm-drop-caches-on-mac-osx
2022-05-19 11:28:44 +01:00
|
|
|
def _query_for_billing_data(notification_type, start_date, end_date, service):
|
2020-02-19 10:58:06 +00:00
|
|
|
def _email_query():
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(
|
|
|
|
|
NotificationAllTimeView.template_id,
|
|
|
|
|
literal(service.id).label("service_id"),
|
|
|
|
|
literal(notification_type).label("notification_type"),
|
|
|
|
|
literal("ses").label("sent_by"),
|
|
|
|
|
literal(0).label("rate_multiplier"),
|
|
|
|
|
literal(False).label("international"),
|
|
|
|
|
literal(0).label("billable_units"),
|
|
|
|
|
func.count().label("notifications_sent"),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
2024-01-16 15:47:55 -05:00
|
|
|
NotificationAllTimeView.status.in_(
|
|
|
|
|
NotificationStatus.sent_email_types()
|
|
|
|
|
),
|
2024-01-18 10:28:50 -05:00
|
|
|
NotificationAllTimeView.key_type.in_((KeyType.NORMAL, KeyType.TEAM)),
|
2023-08-23 10:35:43 -07:00
|
|
|
NotificationAllTimeView.created_at >= start_date,
|
|
|
|
|
NotificationAllTimeView.created_at < end_date,
|
|
|
|
|
NotificationAllTimeView.notification_type == notification_type,
|
|
|
|
|
NotificationAllTimeView.service_id == service.id,
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
NotificationAllTimeView.template_id,
|
|
|
|
|
)
|
2020-02-19 10:58:06 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _sms_query():
|
2023-08-23 10:35:43 -07:00
|
|
|
sent_by = func.coalesce(NotificationAllTimeView.sent_by, "unknown")
|
|
|
|
|
rate_multiplier = func.coalesce(
|
|
|
|
|
NotificationAllTimeView.rate_multiplier, 1
|
|
|
|
|
).cast(Integer)
|
2022-05-24 10:16:28 +01:00
|
|
|
international = func.coalesce(NotificationAllTimeView.international, False)
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(
|
|
|
|
|
NotificationAllTimeView.template_id,
|
|
|
|
|
literal(service.id).label("service_id"),
|
|
|
|
|
literal(notification_type).label("notification_type"),
|
|
|
|
|
sent_by.label("sent_by"),
|
|
|
|
|
rate_multiplier.label("rate_multiplier"),
|
|
|
|
|
international.label("international"),
|
|
|
|
|
func.sum(NotificationAllTimeView.billable_units).label(
|
|
|
|
|
"billable_units"
|
|
|
|
|
),
|
|
|
|
|
func.count().label("notifications_sent"),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
2024-01-16 14:46:17 -05:00
|
|
|
NotificationAllTimeView.status.in_(
|
2024-01-16 15:47:55 -05:00
|
|
|
NotificationStatus.billable_sms_types()
|
2024-01-16 14:46:17 -05:00
|
|
|
),
|
2024-01-18 10:28:50 -05:00
|
|
|
NotificationAllTimeView.key_type.in_((KeyType.NORMAL, KeyType.TEAM)),
|
2023-08-23 10:35:43 -07:00
|
|
|
NotificationAllTimeView.created_at >= start_date,
|
|
|
|
|
NotificationAllTimeView.created_at < end_date,
|
|
|
|
|
NotificationAllTimeView.notification_type == notification_type,
|
|
|
|
|
NotificationAllTimeView.service_id == service.id,
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
NotificationAllTimeView.template_id,
|
|
|
|
|
sent_by,
|
|
|
|
|
rate_multiplier,
|
|
|
|
|
international,
|
|
|
|
|
)
|
2020-02-19 10:58:06 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
query_funcs = {
|
2024-02-28 12:40:52 -05:00
|
|
|
NotificationType.SMS: _sms_query,
|
|
|
|
|
NotificationType.EMAIL: _email_query,
|
2019-07-18 15:29:54 +01:00
|
|
|
}
|
2019-11-15 10:23:48 +00:00
|
|
|
|
2020-02-19 10:58:06 +00:00
|
|
|
query = query_funcs[notification_type]()
|
2019-07-18 15:29:54 +01:00
|
|
|
return query.all()
|
|
|
|
|
|
|
|
|
|
|
2018-04-24 17:37:04 +01:00
|
|
|
def get_rates_for_billing():
|
2023-03-02 20:20:31 -05:00
|
|
|
rates = Rate.query.order_by(desc(Rate.valid_from)).all()
|
|
|
|
|
return rates
|
2018-04-24 17:37:04 +01:00
|
|
|
|
|
|
|
|
|
2018-07-23 15:14:37 +01:00
|
|
|
def get_service_ids_that_need_billing_populated(start_date, end_date):
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(NotificationHistory.service_id)
|
|
|
|
|
.filter(
|
|
|
|
|
NotificationHistory.created_at >= start_date,
|
|
|
|
|
NotificationHistory.created_at <= end_date,
|
2024-02-28 12:41:57 -05:00
|
|
|
NotificationHistory.notification_type.in_(
|
|
|
|
|
[NotificationType.SMS, NotificationType.EMAIL]
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
NotificationHistory.billable_units != 0,
|
|
|
|
|
)
|
|
|
|
|
.distinct()
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2018-07-23 15:14:37 +01:00
|
|
|
|
|
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
def get_rate(rates, notification_type, date):
|
2023-05-10 08:39:50 -07:00
|
|
|
start_of_day = get_midnight_in_utc(date)
|
2019-04-03 14:52:41 +01:00
|
|
|
|
2024-02-28 12:40:52 -05:00
|
|
|
if notification_type == NotificationType.SMS:
|
2019-04-03 13:07:49 +01:00
|
|
|
return next(
|
|
|
|
|
r.rate
|
2023-08-23 10:35:43 -07:00
|
|
|
for r in rates
|
|
|
|
|
if (
|
|
|
|
|
notification_type == r.notification_type
|
|
|
|
|
and start_of_day >= r.valid_from
|
2019-04-03 13:07:49 +01:00
|
|
|
)
|
|
|
|
|
)
|
2018-04-24 17:37:04 +01:00
|
|
|
else:
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
2018-04-25 14:24:47 +01:00
|
|
|
def update_fact_billing(data, process_day):
|
2023-03-02 20:20:31 -05:00
|
|
|
rates = get_rates_for_billing()
|
2023-08-23 10:35:43 -07:00
|
|
|
rate = get_rate(rates, data.notification_type, process_day)
|
2018-05-15 11:21:10 +01:00
|
|
|
billing_record = create_billing_record(data, rate, process_day)
|
2019-07-18 15:29:54 +01:00
|
|
|
|
2018-05-15 11:21:10 +01:00
|
|
|
table = FactBilling.__table__
|
2023-08-23 10:35:43 -07:00
|
|
|
"""
|
2018-05-15 11:21:10 +01:00
|
|
|
This uses the Postgres upsert to avoid race conditions when two threads try to insert
|
|
|
|
|
at the same row. The excluded object refers to values that we tried to insert but were
|
|
|
|
|
rejected.
|
|
|
|
|
http://docs.sqlalchemy.org/en/latest/dialects/postgresql.html#insert-on-conflict-upsert
|
2023-08-23 10:35:43 -07:00
|
|
|
"""
|
2018-05-15 11:21:10 +01:00
|
|
|
stmt = insert(table).values(
|
2022-11-21 11:49:59 -05:00
|
|
|
local_date=billing_record.local_date,
|
2018-05-15 11:21:10 +01:00
|
|
|
template_id=billing_record.template_id,
|
|
|
|
|
service_id=billing_record.service_id,
|
|
|
|
|
provider=billing_record.provider,
|
|
|
|
|
rate_multiplier=billing_record.rate_multiplier,
|
|
|
|
|
notification_type=billing_record.notification_type,
|
|
|
|
|
international=billing_record.international,
|
|
|
|
|
billable_units=billing_record.billable_units,
|
|
|
|
|
notifications_sent=billing_record.notifications_sent,
|
2018-09-26 11:28:59 +01:00
|
|
|
rate=billing_record.rate,
|
2018-05-15 11:21:10 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
stmt = stmt.on_conflict_do_update(
|
2018-05-21 14:38:25 +01:00
|
|
|
constraint="ft_billing_pkey",
|
2023-08-23 10:35:43 -07:00
|
|
|
set_={
|
|
|
|
|
"notifications_sent": stmt.excluded.notifications_sent,
|
|
|
|
|
"billable_units": stmt.excluded.billable_units,
|
|
|
|
|
"updated_at": datetime.utcnow(),
|
|
|
|
|
},
|
2018-05-15 11:21:10 +01:00
|
|
|
)
|
|
|
|
|
db.session.connection().execute(stmt)
|
2018-04-24 17:37:04 +01:00
|
|
|
db.session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_billing_record(data, rate, process_day):
|
|
|
|
|
billing_record = FactBilling(
|
2022-11-21 11:49:59 -05:00
|
|
|
local_date=process_day,
|
2018-04-24 17:37:04 +01:00
|
|
|
template_id=data.template_id,
|
|
|
|
|
service_id=data.service_id,
|
|
|
|
|
notification_type=data.notification_type,
|
|
|
|
|
provider=data.sent_by,
|
|
|
|
|
rate_multiplier=data.rate_multiplier,
|
|
|
|
|
international=data.international,
|
|
|
|
|
billable_units=data.billable_units,
|
|
|
|
|
notifications_sent=data.notifications_sent,
|
2018-09-26 11:28:59 +01:00
|
|
|
rate=rate,
|
2018-04-24 17:37:04 +01:00
|
|
|
)
|
|
|
|
|
return billing_record
|
2020-02-24 14:19:12 +00:00
|
|
|
|
|
|
|
|
|
2023-07-10 11:06:29 -07:00
|
|
|
def fetch_email_usage_for_organization(organization_id, start_date, end_date):
|
2023-08-23 10:35:43 -07:00
|
|
|
query = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
Service.name.label("service_name"),
|
|
|
|
|
Service.id.label("service_id"),
|
|
|
|
|
func.sum(FactBilling.notifications_sent).label("emails_sent"),
|
|
|
|
|
)
|
|
|
|
|
.select_from(Service)
|
|
|
|
|
.join(
|
|
|
|
|
FactBilling,
|
|
|
|
|
FactBilling.service_id == Service.id,
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
FactBilling.local_date >= start_date,
|
|
|
|
|
FactBilling.local_date <= end_date,
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.EMAIL,
|
2023-08-23 10:35:43 -07:00
|
|
|
Service.organization_id == organization_id,
|
|
|
|
|
Service.restricted.is_(False),
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
Service.id,
|
|
|
|
|
Service.name,
|
|
|
|
|
)
|
|
|
|
|
.order_by(Service.name)
|
2020-02-24 14:19:12 +00:00
|
|
|
)
|
|
|
|
|
return query.all()
|
|
|
|
|
|
|
|
|
|
|
2023-07-10 11:06:29 -07:00
|
|
|
def fetch_sms_billing_for_organization(organization_id, financial_year):
|
2020-02-24 14:19:12 +00:00
|
|
|
# ASSUMPTION: AnnualBilling has been populated for year.
|
2023-08-23 10:35:43 -07:00
|
|
|
ft_billing_subquery = query_organization_sms_usage_for_year(
|
|
|
|
|
organization_id, financial_year
|
|
|
|
|
).subquery()
|
2020-02-24 14:19:12 +00:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
sms_billable_units = func.sum(
|
|
|
|
|
func.coalesce(ft_billing_subquery.c.chargeable_units, 0)
|
|
|
|
|
)
|
Fix calculating remaining free allowance for SMS
The way it was done before, the remainder was incorrect in the
billing report and in the org usage query - it was the sms remainder
left at the start of the report period, not at the end of that period.
This became apparent when we tried to show sms_remainder on the org
usage report, where start date is always the start of the financial year.
We saw that sms sent by services did not reduce their free allowance
remainder according to the report. As a result of this, we had to
temporarily remove of sms_remainder column from the report, until
we fix the bug - it has been fixed now, yay!
I think the bug has snuck in partially because our fixtures for testing
this part of the code are quite complex, so it was
harder to see that numbers don't add up. I have added comments
to the tests to try and make it a bit clearer why the results are
as they are.
I also added comments to the code, and renamed some variables,
to make it easier to understand, as there are quite a few
moving parts in it - subqueries and the like.
I also renamed the fetch_sms_free_allowance_remainder method to
fetch_sms_free_allowance_remainder_until_date so it is clearer
what it does.
2021-12-09 17:50:03 +00:00
|
|
|
|
|
|
|
|
# subtract sms_billable_units units accrued since report's start date to get up-to-date
|
|
|
|
|
# allowance remainder
|
2023-08-23 10:35:43 -07:00
|
|
|
sms_allowance_left = func.greatest(
|
|
|
|
|
AnnualBilling.free_sms_fragment_limit - sms_billable_units, 0
|
|
|
|
|
)
|
Fix calculating remaining free allowance for SMS
The way it was done before, the remainder was incorrect in the
billing report and in the org usage query - it was the sms remainder
left at the start of the report period, not at the end of that period.
This became apparent when we tried to show sms_remainder on the org
usage report, where start date is always the start of the financial year.
We saw that sms sent by services did not reduce their free allowance
remainder according to the report. As a result of this, we had to
temporarily remove of sms_remainder column from the report, until
we fix the bug - it has been fixed now, yay!
I think the bug has snuck in partially because our fixtures for testing
this part of the code are quite complex, so it was
harder to see that numbers don't add up. I have added comments
to the tests to try and make it a bit clearer why the results are
as they are.
I also added comments to the code, and renamed some variables,
to make it easier to understand, as there are quite a few
moving parts in it - subqueries and the like.
I also renamed the fetch_sms_free_allowance_remainder method to
fetch_sms_free_allowance_remainder_until_date so it is clearer
what it does.
2021-12-09 17:50:03 +00:00
|
|
|
|
2022-04-29 18:20:14 +01:00
|
|
|
chargeable_sms = func.sum(ft_billing_subquery.c.charged_units)
|
|
|
|
|
sms_cost = func.sum(ft_billing_subquery.c.cost)
|
2020-02-24 14:19:12 +00:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
query = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
Service.name.label("service_name"),
|
|
|
|
|
Service.id.label("service_id"),
|
|
|
|
|
AnnualBilling.free_sms_fragment_limit,
|
|
|
|
|
func.coalesce(sms_allowance_left, 0).label("sms_remainder"),
|
|
|
|
|
func.coalesce(sms_billable_units, 0).label("sms_billable_units"),
|
|
|
|
|
func.coalesce(chargeable_sms, 0).label("chargeable_billable_sms"),
|
|
|
|
|
func.coalesce(sms_cost, 0).label("sms_cost"),
|
|
|
|
|
Service.active,
|
|
|
|
|
)
|
|
|
|
|
.select_from(Service)
|
|
|
|
|
.outerjoin(
|
|
|
|
|
AnnualBilling,
|
|
|
|
|
and_(
|
|
|
|
|
Service.id == AnnualBilling.service_id,
|
|
|
|
|
AnnualBilling.financial_year_start == financial_year,
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
.outerjoin(ft_billing_subquery, Service.id == ft_billing_subquery.c.service_id)
|
|
|
|
|
.filter(
|
|
|
|
|
Service.organization_id == organization_id, Service.restricted.is_(False)
|
|
|
|
|
)
|
|
|
|
|
.group_by(Service.id, Service.name, AnnualBilling.free_sms_fragment_limit)
|
|
|
|
|
.order_by(Service.name)
|
2020-02-24 14:19:12 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return query.all()
|
|
|
|
|
|
|
|
|
|
|
2023-07-10 11:06:29 -07:00
|
|
|
def query_organization_sms_usage_for_year(organization_id, year):
|
2022-04-27 15:32:13 +01:00
|
|
|
"""
|
|
|
|
|
See docstring for query_service_sms_usage_for_year()
|
|
|
|
|
"""
|
2023-06-14 13:19:11 -07:00
|
|
|
year_start, year_end = get_calendar_year_dates(year)
|
2023-08-23 10:35:43 -07:00
|
|
|
this_rows_chargeable_units = (
|
|
|
|
|
FactBilling.billable_units * FactBilling.rate_multiplier
|
|
|
|
|
)
|
2022-04-27 15:32:13 +01:00
|
|
|
|
|
|
|
|
# Subquery for the number of chargeable units in all rows preceding this one,
|
|
|
|
|
# which might be none if this is the first row (hence the "coalesce").
|
2022-05-18 11:45:51 +01:00
|
|
|
chargeable_units_used_before_this_row = func.coalesce(
|
2023-08-23 10:35:43 -07:00
|
|
|
func.sum(this_rows_chargeable_units)
|
|
|
|
|
.over(
|
2022-04-27 15:32:13 +01:00
|
|
|
# order is "ASC" by default
|
2022-11-21 11:49:59 -05:00
|
|
|
order_by=[FactBilling.local_date],
|
2022-04-27 15:32:13 +01:00
|
|
|
# partition by service id
|
|
|
|
|
partition_by=FactBilling.service_id,
|
|
|
|
|
# first row to previous row
|
2023-08-23 10:35:43 -07:00
|
|
|
rows=(None, -1),
|
|
|
|
|
)
|
|
|
|
|
.cast(Integer),
|
|
|
|
|
0,
|
2022-04-27 15:32:13 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Subquery for how much free allowance we have left before the current row,
|
|
|
|
|
# so we can work out the cost for this row after taking it into account.
|
|
|
|
|
remaining_free_allowance_before_this_row = func.greatest(
|
2023-08-23 10:35:43 -07:00
|
|
|
AnnualBilling.free_sms_fragment_limit - chargeable_units_used_before_this_row, 0
|
2022-04-27 15:32:13 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Subquery for the number of chargeable_units that we will actually charge
|
|
|
|
|
# for, after taking any remaining free allowance into account.
|
2023-08-23 10:35:43 -07:00
|
|
|
charged_units = func.greatest(
|
|
|
|
|
this_rows_chargeable_units - remaining_free_allowance_before_this_row, 0
|
|
|
|
|
)
|
2022-04-27 15:32:13 +01:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
return (
|
|
|
|
|
db.session.query(
|
|
|
|
|
Service.id.label("service_id"),
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
this_rows_chargeable_units.label("chargeable_units"),
|
|
|
|
|
(charged_units * FactBilling.rate).label("cost"),
|
|
|
|
|
charged_units.label("charged_units"),
|
|
|
|
|
)
|
|
|
|
|
.join(AnnualBilling, AnnualBilling.service_id == Service.id)
|
|
|
|
|
.outerjoin(
|
|
|
|
|
FactBilling,
|
|
|
|
|
and_(
|
|
|
|
|
Service.id == FactBilling.service_id,
|
|
|
|
|
FactBilling.local_date >= year_start,
|
|
|
|
|
FactBilling.local_date <= year_end,
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
2023-08-23 10:35:43 -07:00
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
Service.organization_id == organization_id,
|
|
|
|
|
AnnualBilling.financial_year_start == year,
|
2022-04-27 15:32:13 +01:00
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2023-07-10 11:06:29 -07:00
|
|
|
def fetch_usage_year_for_organization(organization_id, year):
|
2023-06-14 13:19:11 -07:00
|
|
|
year_start, year_end = get_calendar_year_dates(year)
|
2023-05-10 08:39:50 -07:00
|
|
|
today = datetime.utcnow().date()
|
2023-07-10 11:06:29 -07:00
|
|
|
services = dao_get_organization_live_services(organization_id)
|
Standardise timezones for service usage APIs
We want to query for service usage in the BST financial year:
2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 =>
2022-04-01 to 2023-03-31 # bst_date
Previously we were only doing this explicitly for the monthly API
and it seemed like the yearly usage API was incorrectly querying:
2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 =>
2022-03-31 to 2023-03-30 # "bst_date"
However, it turns out this isn't a problem for two reasons:
1. We've been lucky that none of our rates have changed since 2017,
which is long ago enough that no one would care.
2. There's a quirk somewhere in Sqlalchemy / Postgres that has been
compensating for the lack of explicit BST conversion.
To help ensure we do this consistently in future I've DRYed-up the
BST conversion into a new utility. I could have just hard-coded the
dates but it seemed strange to have the knowledge twice.
I've also adjusted the tests so they detect if we accidentally use
data from a different financial year. (2) is why none of the test
assertions actually need changing and users won't be affected.
Sqlalchemy / Postgres quirk
===========================
The following queries were run on the same data but results differ:
FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 4, 1)
FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 3, 31)
Looking at the actual query for the first item above still suggests
the results should be the same, but for the use of "timestamp".
SELECT ...
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate
If we try some manual queries with and without '::timestamp' we get:
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
It looks like this is happening because all client connections are
aware of the local timezone, and naive datetimes are interpreted as
being in UTC - not necessarily true, but saves us here!
The monthly API datetimes were pre-converted to dates, so none of
this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
|
|
|
|
2020-02-24 14:19:12 +00:00
|
|
|
# if year end date is less than today, we are calculating for data in the past and have no need for deltas.
|
Standardise timezones for service usage APIs
We want to query for service usage in the BST financial year:
2022-04-01T00:00:00+01:00 to 2023-03-31T23:59:59+01:00 =>
2022-04-01 to 2023-03-31 # bst_date
Previously we were only doing this explicitly for the monthly API
and it seemed like the yearly usage API was incorrectly querying:
2022-03-31T23:00:00+00:00 to 2023-03-30T23:00:00+00:00 =>
2022-03-31 to 2023-03-30 # "bst_date"
However, it turns out this isn't a problem for two reasons:
1. We've been lucky that none of our rates have changed since 2017,
which is long ago enough that no one would care.
2. There's a quirk somewhere in Sqlalchemy / Postgres that has been
compensating for the lack of explicit BST conversion.
To help ensure we do this consistently in future I've DRYed-up the
BST conversion into a new utility. I could have just hard-coded the
dates but it seemed strange to have the knowledge twice.
I've also adjusted the tests so they detect if we accidentally use
data from a different financial year. (2) is why none of the test
assertions actually need changing and users won't be affected.
Sqlalchemy / Postgres quirk
===========================
The following queries were run on the same data but results differ:
FactBilling.query.filter(FactBilling.bst_date >= datetime(2021,3,31,23,0), FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 4, 1)
FactBilling.query.filter(FactBilling.bst_date >= '2021-03-31 23:00:00', FactBilling.bst_date <= '2021-04-05').order_by(FactBilling.bst_date).first().bst_date
datetime.date(2021, 3, 31)
Looking at the actual query for the first item above still suggests
the results should be the same, but for the use of "timestamp".
SELECT ...
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type IN ('email', 'letter') GROUP BY ft_billing.rate, ft_billing.notification_type UNION ALL SELECT sum(ft_billing.notifications_sent) AS notifications_sent, sum(ft_billing.billable_units * ft_billing.rate_multiplier) AS billable_units, ft_billing.rate AS ft_billing_rate, ft_billing.notification_type AS ft_billing_notification_type
FROM ft_billing
WHERE ft_billing.service_id = '16b60315-9dab-45d3-a609-e871fbbf5345'::uuid AND ft_billing.bst_date >= '2016-03-31T23:00:00'::timestamp AND ft_billing.bst_date <= '2017-03-31T22:59:59.999999'::timestamp AND ft_billing.notification_type = 'sms' GROUP BY ft_billing.rate, ft_billing.notification_type) AS anon_1 ORDER BY anon_1.notification_type, anon_1.rate
If we try some manual queries with and without '::timestamp' we get:
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00' order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
select distinct(bst_date) from ft_billing where bst_date >= '2022-04-20T23:00:00'::timestamp order by bst_date desc;
bst_date
------------
2022-04-21
2022-04-20
It looks like this is happening because all client connections are
aware of the local timezone, and naive datetimes are interpreted as
being in UTC - not necessarily true, but saves us here!
The monthly API datetimes were pre-converted to dates, so none of
this was relevant for deciding exactly which date to use.
2022-04-21 16:56:28 +01:00
|
|
|
if year_end >= today:
|
2020-02-24 14:19:12 +00:00
|
|
|
for service in services:
|
2020-02-26 17:38:20 +00:00
|
|
|
data = fetch_billing_data_for_day(process_day=today, service_id=service.id)
|
|
|
|
|
for d in data:
|
|
|
|
|
update_fact_billing(data=d, process_day=today)
|
2020-02-24 14:19:12 +00:00
|
|
|
service_with_usage = {}
|
|
|
|
|
# initialise results
|
|
|
|
|
for service in services:
|
|
|
|
|
service_with_usage[str(service.id)] = {
|
2023-08-23 10:35:43 -07:00
|
|
|
"service_id": service.id,
|
|
|
|
|
"service_name": service.name,
|
|
|
|
|
"free_sms_limit": 0,
|
|
|
|
|
"sms_remainder": 0,
|
|
|
|
|
"sms_billable_units": 0,
|
|
|
|
|
"chargeable_billable_sms": 0,
|
|
|
|
|
"sms_cost": 0.0,
|
|
|
|
|
"emails_sent": 0,
|
|
|
|
|
"active": service.active,
|
2020-02-24 14:19:12 +00:00
|
|
|
}
|
2023-07-10 11:06:29 -07:00
|
|
|
sms_usages = fetch_sms_billing_for_organization(organization_id, year)
|
2023-08-23 10:35:43 -07:00
|
|
|
email_usages = fetch_email_usage_for_organization(
|
|
|
|
|
organization_id, year_start, year_end
|
|
|
|
|
)
|
2020-02-24 14:19:12 +00:00
|
|
|
for usage in sms_usages:
|
|
|
|
|
service_with_usage[str(usage.service_id)] = {
|
2023-08-23 10:35:43 -07:00
|
|
|
"service_id": usage.service_id,
|
|
|
|
|
"service_name": usage.service_name,
|
|
|
|
|
"free_sms_limit": usage.free_sms_fragment_limit,
|
|
|
|
|
"sms_remainder": usage.sms_remainder,
|
|
|
|
|
"sms_billable_units": usage.sms_billable_units,
|
|
|
|
|
"chargeable_billable_sms": usage.chargeable_billable_sms,
|
|
|
|
|
"sms_cost": float(usage.sms_cost),
|
|
|
|
|
"emails_sent": 0,
|
|
|
|
|
"active": usage.active,
|
2020-02-24 14:19:12 +00:00
|
|
|
}
|
|
|
|
|
for email_usage in email_usages:
|
2023-08-23 10:35:43 -07:00
|
|
|
service_with_usage[str(email_usage.service_id)][
|
|
|
|
|
"emails_sent"
|
|
|
|
|
] = email_usage.emails_sent
|
2020-02-24 14:19:12 +00:00
|
|
|
|
|
|
|
|
return service_with_usage
|
2021-02-23 18:37:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_billing_details_for_all_services():
|
2023-08-23 10:35:43 -07:00
|
|
|
billing_details = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
Service.id.label("service_id"),
|
|
|
|
|
func.coalesce(
|
|
|
|
|
Service.purchase_order_number, Organization.purchase_order_number
|
|
|
|
|
).label("purchase_order_number"),
|
|
|
|
|
func.coalesce(
|
|
|
|
|
Service.billing_contact_names, Organization.billing_contact_names
|
|
|
|
|
).label("billing_contact_names"),
|
|
|
|
|
func.coalesce(
|
|
|
|
|
Service.billing_contact_email_addresses,
|
|
|
|
|
Organization.billing_contact_email_addresses,
|
|
|
|
|
).label("billing_contact_email_addresses"),
|
|
|
|
|
func.coalesce(
|
|
|
|
|
Service.billing_reference, Organization.billing_reference
|
|
|
|
|
).label("billing_reference"),
|
|
|
|
|
)
|
|
|
|
|
.outerjoin(Service.organization)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2021-02-23 18:37:05 +00:00
|
|
|
|
|
|
|
|
return billing_details
|
2022-03-03 14:47:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_daily_volumes_for_platform(start_date, end_date):
|
|
|
|
|
# query to return the total notifications sent per day for each channel. NB start and end dates are inclusive
|
|
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
daily_volume_stats = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
|
|
|
|
FactBilling.notifications_sent,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("sms_totals"),
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
|
|
|
|
FactBilling.billable_units,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("sms_fragment_totals"),
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
|
|
|
|
FactBilling.billable_units * FactBilling.rate_multiplier,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("sms_fragments_times_multiplier"),
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.EMAIL,
|
|
|
|
|
FactBilling.notifications_sent,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("email_totals"),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
FactBilling.local_date >= start_date, FactBilling.local_date <= end_date
|
|
|
|
|
)
|
|
|
|
|
.group_by(FactBilling.local_date, FactBilling.notification_type)
|
|
|
|
|
.subquery()
|
|
|
|
|
)
|
2022-03-03 14:47:56 +00:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
aggregated_totals = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
daily_volume_stats.c.local_date.cast(db.Text).label("local_date"),
|
|
|
|
|
func.sum(daily_volume_stats.c.sms_totals).label("sms_totals"),
|
|
|
|
|
func.sum(daily_volume_stats.c.sms_fragment_totals).label(
|
|
|
|
|
"sms_fragment_totals"
|
|
|
|
|
),
|
|
|
|
|
func.sum(daily_volume_stats.c.sms_fragments_times_multiplier).label(
|
|
|
|
|
"sms_chargeable_units"
|
|
|
|
|
),
|
|
|
|
|
func.sum(daily_volume_stats.c.email_totals).label("email_totals"),
|
|
|
|
|
)
|
|
|
|
|
.group_by(daily_volume_stats.c.local_date)
|
|
|
|
|
.order_by(daily_volume_stats.c.local_date)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2022-03-03 14:47:56 +00:00
|
|
|
|
|
|
|
|
return aggregated_totals
|
|
|
|
|
|
|
|
|
|
|
2022-04-07 17:52:37 +01:00
|
|
|
def fetch_daily_sms_provider_volumes_for_platform(start_date, end_date):
|
|
|
|
|
# query to return the total notifications sent per day for each channel. NB start and end dates are inclusive
|
|
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
daily_volume_stats = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
FactBilling.provider,
|
|
|
|
|
func.sum(FactBilling.notifications_sent).label("sms_totals"),
|
|
|
|
|
func.sum(FactBilling.billable_units).label("sms_fragment_totals"),
|
|
|
|
|
func.sum(FactBilling.billable_units * FactBilling.rate_multiplier).label(
|
|
|
|
|
"sms_chargeable_units"
|
|
|
|
|
),
|
|
|
|
|
func.sum(
|
|
|
|
|
FactBilling.billable_units
|
|
|
|
|
* FactBilling.rate_multiplier
|
|
|
|
|
* FactBilling.rate
|
|
|
|
|
).label("sms_cost"),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
2024-02-28 12:40:52 -05:00
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
2023-08-23 10:35:43 -07:00
|
|
|
FactBilling.local_date >= start_date,
|
|
|
|
|
FactBilling.local_date <= end_date,
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
FactBilling.provider,
|
|
|
|
|
)
|
|
|
|
|
.order_by(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
FactBilling.provider,
|
|
|
|
|
)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2022-04-07 17:52:37 +01:00
|
|
|
|
|
|
|
|
return daily_volume_stats
|
|
|
|
|
|
|
|
|
|
|
2022-03-03 14:47:56 +00:00
|
|
|
def fetch_volumes_by_service(start_date, end_date):
|
|
|
|
|
# query to return the volume totals by service aggregated for the date range given
|
|
|
|
|
# start and end dates are inclusive.
|
2023-08-23 10:35:43 -07:00
|
|
|
year_end_date = int(end_date.strftime("%Y"))
|
|
|
|
|
|
|
|
|
|
volume_stats = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
FactBilling.service_id,
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
|
|
|
|
FactBilling.notifications_sent,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("sms_totals"),
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.SMS,
|
|
|
|
|
FactBilling.billable_units * FactBilling.rate_multiplier,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("sms_fragments_times_multiplier"),
|
|
|
|
|
func.sum(
|
|
|
|
|
case(
|
2024-04-24 16:27:20 -04:00
|
|
|
(
|
|
|
|
|
FactBilling.notification_type == NotificationType.EMAIL,
|
|
|
|
|
FactBilling.notifications_sent,
|
|
|
|
|
),
|
2023-08-23 10:35:43 -07:00
|
|
|
else_=0,
|
|
|
|
|
)
|
|
|
|
|
).label("email_totals"),
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
FactBilling.local_date >= start_date, FactBilling.local_date <= end_date
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
FactBilling.local_date,
|
|
|
|
|
FactBilling.service_id,
|
|
|
|
|
FactBilling.notification_type,
|
|
|
|
|
)
|
|
|
|
|
.subquery()
|
|
|
|
|
)
|
2022-03-03 14:47:56 +00:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
annual_billing = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
func.max(AnnualBilling.financial_year_start).label("financial_year_start"),
|
|
|
|
|
AnnualBilling.service_id,
|
|
|
|
|
AnnualBilling.free_sms_fragment_limit,
|
|
|
|
|
)
|
|
|
|
|
.filter(AnnualBilling.financial_year_start <= year_end_date)
|
|
|
|
|
.group_by(AnnualBilling.service_id, AnnualBilling.free_sms_fragment_limit)
|
|
|
|
|
.subquery()
|
|
|
|
|
)
|
2022-03-03 14:47:56 +00:00
|
|
|
|
2023-08-23 10:35:43 -07:00
|
|
|
results = (
|
|
|
|
|
db.session.query(
|
|
|
|
|
Service.name.label("service_name"),
|
|
|
|
|
Service.id.label("service_id"),
|
|
|
|
|
Service.organization_id.label("organization_id"),
|
|
|
|
|
Organization.name.label("organization_name"),
|
|
|
|
|
annual_billing.c.free_sms_fragment_limit.label("free_allowance"),
|
|
|
|
|
func.coalesce(func.sum(volume_stats.c.sms_totals), 0).label(
|
|
|
|
|
"sms_notifications"
|
|
|
|
|
),
|
|
|
|
|
func.coalesce(
|
|
|
|
|
func.sum(volume_stats.c.sms_fragments_times_multiplier), 0
|
|
|
|
|
).label("sms_chargeable_units"),
|
|
|
|
|
func.coalesce(func.sum(volume_stats.c.email_totals), 0).label(
|
|
|
|
|
"email_totals"
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
.select_from(Service)
|
|
|
|
|
.outerjoin(Organization, Service.organization_id == Organization.id)
|
|
|
|
|
.join(annual_billing, Service.id == annual_billing.c.service_id)
|
|
|
|
|
.outerjoin( # include services without volume
|
|
|
|
|
volume_stats, Service.id == volume_stats.c.service_id
|
|
|
|
|
)
|
|
|
|
|
.filter(
|
|
|
|
|
Service.restricted.is_(False),
|
|
|
|
|
Service.count_as_live.is_(True),
|
|
|
|
|
Service.active.is_(True),
|
|
|
|
|
)
|
|
|
|
|
.group_by(
|
|
|
|
|
Service.id,
|
|
|
|
|
Service.name,
|
|
|
|
|
Service.organization_id,
|
|
|
|
|
Organization.name,
|
|
|
|
|
annual_billing.c.free_sms_fragment_limit,
|
|
|
|
|
)
|
|
|
|
|
.order_by(
|
|
|
|
|
Organization.name,
|
|
|
|
|
Service.name,
|
|
|
|
|
)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2022-03-03 14:47:56 +00:00
|
|
|
|
|
|
|
|
return results
|