From effaecbd49007c9c8919b608f128413343ed4256 Mon Sep 17 00:00:00 2001 From: venusbb Date: Wed, 7 Mar 2018 12:46:11 +0000 Subject: [PATCH 1/4] added ft_billing and dm_datetime tables for report purpose --- migrations/versions/0174_add_billing_facts.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 migrations/versions/0174_add_billing_facts.py diff --git a/migrations/versions/0174_add_billing_facts.py b/migrations/versions/0174_add_billing_facts.py new file mode 100644 index 000000000..5d43d07f4 --- /dev/null +++ b/migrations/versions/0174_add_billing_facts.py @@ -0,0 +1,97 @@ +""" + +Revision ID: 0174_add_billing_facts +Revises: 0173_create_daily_sorted_letter +Create Date: 2018-03-07 12:21:53.098887 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +revision = '0174_add_billing_facts' +down_revision = '0173_create_daily_sorted_letter' + + +def upgrade(): + # Create notifications_for_today table + op.create_table('ft_billing', + sa.Column('dm_datetime', sa.Date(), nullable=True), + sa.Column('template', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('service', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('annual_billing', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('notification_type', sa.Text(), nullable=True), + sa.Column('provider', sa.Text(), nullable=True), + sa.Column('crown', sa.Text(), nullable=True), + sa.Column('rate_multiplier', sa.Numeric(), nullable=True), + sa.Column('international', sa.Boolean(), nullable=True), + sa.Column('rate', sa.Numeric(), nullable=True), + sa.Column('billable_units', sa.Numeric(), nullable=True), + sa.Column('notifications_sent', sa.Integer(), nullable=True), + sa.PrimaryKeyConstraint('dm_datetime', 'template') + ) + + # Create dm_datetime table + op.create_table('dm_datetime', + sa.Column('bst_date', sa.Date(), nullable=False), + sa.Column('year', sa.Integer(), nullable=False), + sa.Column('month', sa.Integer(), nullable=False), + sa.Column('month_name', sa.String(), nullable=False), + sa.Column('day', sa.Integer(), nullable=True), + sa.Column('bst_day', sa.Integer(), nullable=False), + sa.Column('day_of_year', sa.Integer(), nullable=False), + sa.Column('week_day_name', sa.String(), nullable=False), + sa.Column('calendar_week', sa.Integer(), nullable=True), + sa.Column('quartal', sa.String(), nullable=False), + sa.Column('year_quartal', sa.String(), nullable=False), + sa.Column('year_month', sa.String(), nullable=False), + sa.Column('year_calendar_week', sa.String(), nullable=False), + sa.Column('financial_year', sa.Integer(), nullable=True), + sa.Column('utc_daytime_start', sa.DateTime(), nullable=False), + sa.Column('utc_daytime_end', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('bst_date') + ) + # Set indexes + op.create_index(op.f('ix_dm_datetime_yearmonth'), 'dm_datetime', ['year', 'month'], unique=False) + + # Insert data into table + op.execute( + """ + INSERT into dm_datetime ( + SELECT + datum AS bst_date, + EXTRACT(YEAR FROM datum) AS year, + EXTRACT(MONTH FROM datum) AS month, + -- Localized month name + to_char(datum, 'TMMonth') AS month_name, + EXTRACT(DAY FROM datum) AS day, + EXTRACT(DAY FROM datum) AS bst_day, + EXTRACT(DOY FROM datum) AS day_of_year, + -- Localized weekday + to_char(datum, 'TMDay') AS week_day_name, + -- ISO calendar week + EXTRACT(week FROM datum) AS calendar_week, + 'Q' || to_char(datum, 'Q') AS quartal, + to_char(datum, 'yyyy/"Q"Q') AS year_quartal, + to_char(datum, 'yyyy/mm') AS year_month, + -- ISO calendar year and week + to_char(datum, 'iyyy/IW') AS year_calendar_week, + (SELECT CASE WHEN (extract(month from datum) <= 3) THEN (extract(year FROM datum) -1) + ELSE (extract(year FROM datum)) end), + (datum + TIME '00:00:00') at TIME zone 'utc' as utc_daytime_start, -- convert bst time to utc time + (datum + TIME '23:59:59') at TIME zone 'utc' as utc_daytime_end + FROM ( + -- There are 5 leap years in this range, so calculate 365 * 20 + 5 records + SELECT '2015-01-01'::date + SEQUENCE.DAY AS datum + FROM generate_series(0,365*20+5) AS SEQUENCE(DAY) + GROUP BY SEQUENCE.day + ) DQ + ORDER BY bst_date + ); + """ + ) + + +def downgrade(): + op.drop_table('ft_billing') + op.drop_table('dm_datetime') \ No newline at end of file From 5082d9d83c10a040376dc9a26023dec19bfea013 Mon Sep 17 00:00:00 2001 From: venusbb Date: Wed, 7 Mar 2018 12:55:56 +0000 Subject: [PATCH 2/4] Increase datetime number of years from 20 to 50 --- migrations/versions/0174_add_billing_facts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/migrations/versions/0174_add_billing_facts.py b/migrations/versions/0174_add_billing_facts.py index 5d43d07f4..a2a504c46 100644 --- a/migrations/versions/0174_add_billing_facts.py +++ b/migrations/versions/0174_add_billing_facts.py @@ -81,9 +81,9 @@ def upgrade(): (datum + TIME '00:00:00') at TIME zone 'utc' as utc_daytime_start, -- convert bst time to utc time (datum + TIME '23:59:59') at TIME zone 'utc' as utc_daytime_end FROM ( - -- There are 5 leap years in this range, so calculate 365 * 20 + 5 records + -- There are 10 leap years in this range, so calculate 365 * 50 + 5 records SELECT '2015-01-01'::date + SEQUENCE.DAY AS datum - FROM generate_series(0,365*20+5) AS SEQUENCE(DAY) + FROM generate_series(0,365*50+10) AS SEQUENCE(DAY) GROUP BY SEQUENCE.day ) DQ ORDER BY bst_date @@ -94,4 +94,4 @@ def upgrade(): def downgrade(): op.drop_table('ft_billing') - op.drop_table('dm_datetime') \ No newline at end of file + op.drop_table('dm_datetime') From a4c054124fd6c43571e41f9e3ea28dc216608dfe Mon Sep 17 00:00:00 2001 From: venusbb Date: Wed, 7 Mar 2018 17:23:52 +0000 Subject: [PATCH 3/4] work in progress --- migrations/versions/0174_add_billing_facts.py | 1 + scripts/etl.sql | 73 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 scripts/etl.sql diff --git a/migrations/versions/0174_add_billing_facts.py b/migrations/versions/0174_add_billing_facts.py index a2a504c46..a9d0b0383 100644 --- a/migrations/versions/0174_add_billing_facts.py +++ b/migrations/versions/0174_add_billing_facts.py @@ -19,6 +19,7 @@ def upgrade(): sa.Column('dm_datetime', sa.Date(), nullable=True), sa.Column('template', postgresql.UUID(as_uuid=True), nullable=True), sa.Column('service', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('organisation', postgresql.UUID(as_uuid=True), nullable=True), sa.Column('annual_billing', postgresql.UUID(as_uuid=True), nullable=True), sa.Column('notification_type', sa.Text(), nullable=True), sa.Column('provider', sa.Text(), nullable=True), diff --git a/scripts/etl.sql b/scripts/etl.sql new file mode 100644 index 000000000..78de2c6da --- /dev/null +++ b/scripts/etl.sql @@ -0,0 +1,73 @@ +-- create a temp table, an intermediate step to transform data from notifications table to the format in ft_billing +-- Note: to run this script successfully, all templates need to be set. + +drop table if exists notifications_temp; + +--create type notification_type as enum('email', 'sms', 'letter'); + +create table notifications_temp ( + notification_id uuid, + dm_datetime date, + template uuid, + service uuid, + annual_billing uuid, + notification_type varchar, + provider varchar, + rate_multiplier numeric, + crown varchar, + rate numeric, + international bool, + billable_units numeric, + notifications_sent numeric +); + + +insert into notifications_temp (notification_id, dm_datetime, template, service, annual_billing, notification_type, +provider, rate_multiplier, crown, rate, international, billable_units, notifications_sent) +select +n.id, +da.bst_date, +n.template_id, +n.service_id, +a.id, +n.notification_type, +coalesce(n.rate_multiplier,1), +s.crown, +coalesce((select rates.rate from rates +where n.notification_type = rates.notification_type and n.sent_at > rates.valid_from order by rates.valid_from desc limit 1), 0), +n.sent_by, +coalesce(n.international, false), +n.billable_units, +1 +from public.notification_history n +left join dm_template t on t.template_id = n.template_id +left join dm_datetime da on n.created_at > da.utc_daytime_start and n.created_at < da.utc_daytime_end +left join service s on s.service_id = n.service_id +left join annual_billing a on a.service_id = n.service_id and a.financial_year_start = da.financial_year; + +update notifications_temp n set rate = (select rate from letter_rates l where n.rate_multiplier = l.sheet_count and n.crown = l.crown) +where notification_type = 'letter' + +-- ft_billing: Aggregate into billing fact table + +delete from ft_billing where 1=1; -- Note: delete this if we are already using ft_billing + +insert into ft_billing (dm_service_year, dm_template, dm_datetime, notification_type, crown, provider, rate_multiplier, +provider_rate, client_rate, international, notifications_sent, billable_units) +select billing.dm_service_year, template.template_id, date.bst_date, billing.notification_type, billing.crown, billing.provider, +avg(billing.rate_multiplier), avg(billing.provider_rate), avg(client_rate), international, +count(*) as notifications_sent, +sum(billing.billable_units) as billable_units +from notifications_temp as billing +left join dm_template template on billing.dm_template = template.template_id +left join dm_datetime date on billing.dm_datetime = date.bst_date +group by date.bst_date, template.template_id, billing.dm_service_year, billing.provider, billing.notification_type, billing.international, billing.crown +order by date.bst_date; + +-- update ft_billing set billing_total=billable_units*rate_multiplier*client_rate where 1=1; + +update ft_billing set provider='DVLA' where notification_type = 'letter'; + +update dm_service_year set organisation='Not set' where organisation = null; + +update dm_service_year set organisation_type='Not set' where organisation_type = NULL; \ No newline at end of file From 01ff06fc0956e9d9ae650463fca05a7239fe9ddb Mon Sep 17 00:00:00 2001 From: venusbb Date: Fri, 9 Mar 2018 07:54:38 +0000 Subject: [PATCH 4/4] Rename tables, column and added indexes --- migrations/versions/0174_add_billing_facts.py | 16 ++-- scripts/etl.sql | 73 ------------------- 2 files changed, 10 insertions(+), 79 deletions(-) delete mode 100644 scripts/etl.sql diff --git a/migrations/versions/0174_add_billing_facts.py b/migrations/versions/0174_add_billing_facts.py index a9d0b0383..ab108e3a8 100644 --- a/migrations/versions/0174_add_billing_facts.py +++ b/migrations/versions/0174_add_billing_facts.py @@ -16,11 +16,11 @@ down_revision = '0173_create_daily_sorted_letter' def upgrade(): # Create notifications_for_today table op.create_table('ft_billing', - sa.Column('dm_datetime', sa.Date(), nullable=True), - sa.Column('template', postgresql.UUID(as_uuid=True), nullable=True), - sa.Column('service', postgresql.UUID(as_uuid=True), nullable=True), - sa.Column('organisation', postgresql.UUID(as_uuid=True), nullable=True), - sa.Column('annual_billing', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('bst_date', sa.Date(), nullable=True), + sa.Column('template_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('service_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('organisation_id', postgresql.UUID(as_uuid=True), nullable=True), + sa.Column('annual_billing_id', postgresql.UUID(as_uuid=True), nullable=True), sa.Column('notification_type', sa.Text(), nullable=True), sa.Column('provider', sa.Text(), nullable=True), sa.Column('crown', sa.Text(), nullable=True), @@ -29,8 +29,11 @@ def upgrade(): sa.Column('rate', sa.Numeric(), nullable=True), sa.Column('billable_units', sa.Numeric(), nullable=True), sa.Column('notifications_sent', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('dm_datetime', 'template') + sa.PrimaryKeyConstraint('bst_date', 'template_id') ) + # Set indexes + op.create_index(op.f('ix_ft_billing_bst_date'), 'ft_billing', ['bst_date'], unique=False) + op.create_index(op.f('ix_ft_billing_service_id'), 'ft_billing', ['service_id'], unique=False) # Create dm_datetime table op.create_table('dm_datetime', @@ -54,6 +57,7 @@ def upgrade(): ) # Set indexes op.create_index(op.f('ix_dm_datetime_yearmonth'), 'dm_datetime', ['year', 'month'], unique=False) + op.create_index(op.f('ix_dm_datetime_bst_date'), 'dm_datetime', ['bst_date'], unique=False) # Insert data into table op.execute( diff --git a/scripts/etl.sql b/scripts/etl.sql deleted file mode 100644 index 78de2c6da..000000000 --- a/scripts/etl.sql +++ /dev/null @@ -1,73 +0,0 @@ --- create a temp table, an intermediate step to transform data from notifications table to the format in ft_billing --- Note: to run this script successfully, all templates need to be set. - -drop table if exists notifications_temp; - ---create type notification_type as enum('email', 'sms', 'letter'); - -create table notifications_temp ( - notification_id uuid, - dm_datetime date, - template uuid, - service uuid, - annual_billing uuid, - notification_type varchar, - provider varchar, - rate_multiplier numeric, - crown varchar, - rate numeric, - international bool, - billable_units numeric, - notifications_sent numeric -); - - -insert into notifications_temp (notification_id, dm_datetime, template, service, annual_billing, notification_type, -provider, rate_multiplier, crown, rate, international, billable_units, notifications_sent) -select -n.id, -da.bst_date, -n.template_id, -n.service_id, -a.id, -n.notification_type, -coalesce(n.rate_multiplier,1), -s.crown, -coalesce((select rates.rate from rates -where n.notification_type = rates.notification_type and n.sent_at > rates.valid_from order by rates.valid_from desc limit 1), 0), -n.sent_by, -coalesce(n.international, false), -n.billable_units, -1 -from public.notification_history n -left join dm_template t on t.template_id = n.template_id -left join dm_datetime da on n.created_at > da.utc_daytime_start and n.created_at < da.utc_daytime_end -left join service s on s.service_id = n.service_id -left join annual_billing a on a.service_id = n.service_id and a.financial_year_start = da.financial_year; - -update notifications_temp n set rate = (select rate from letter_rates l where n.rate_multiplier = l.sheet_count and n.crown = l.crown) -where notification_type = 'letter' - --- ft_billing: Aggregate into billing fact table - -delete from ft_billing where 1=1; -- Note: delete this if we are already using ft_billing - -insert into ft_billing (dm_service_year, dm_template, dm_datetime, notification_type, crown, provider, rate_multiplier, -provider_rate, client_rate, international, notifications_sent, billable_units) -select billing.dm_service_year, template.template_id, date.bst_date, billing.notification_type, billing.crown, billing.provider, -avg(billing.rate_multiplier), avg(billing.provider_rate), avg(client_rate), international, -count(*) as notifications_sent, -sum(billing.billable_units) as billable_units -from notifications_temp as billing -left join dm_template template on billing.dm_template = template.template_id -left join dm_datetime date on billing.dm_datetime = date.bst_date -group by date.bst_date, template.template_id, billing.dm_service_year, billing.provider, billing.notification_type, billing.international, billing.crown -order by date.bst_date; - --- update ft_billing set billing_total=billable_units*rate_multiplier*client_rate where 1=1; - -update ft_billing set provider='DVLA' where notification_type = 'letter'; - -update dm_service_year set organisation='Not set' where organisation = null; - -update dm_service_year set organisation_type='Not set' where organisation_type = NULL; \ No newline at end of file