From 82cc6d6bef60a80d97be6e5a0e62e322f8f889f4 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Wed, 14 Mar 2018 17:04:58 +0000 Subject: [PATCH 1/2] As it turns out the s3ftp used to mount the s3 bucket to our ftp server puts the file on s3 more than once. So the SNS topic is triggered more than once. We need to deal with this, it's ok when updating a notification status from delivered to delivered. But the DailySortedLetter counts are being doubled. Adding the file_name to the table as a unique key to get around this issue. It will mean we have multiple rows for each billing_day, but that's ok we can aggregate that. This will also give us a way to see which file created which count. --- app/celery/tasks.py | 7 +++-- app/dao/daily_sorted_letter_dao.py | 7 +++-- app/models.py | 6 +++- migrations/versions/0178_add_filename.py | 29 +++++++++++++++++++ tests/app/celery/test_ftp_update_tasks.py | 6 ++-- tests/app/dao/test_daily_sorted_letter_dao.py | 19 ++++++++---- tests/app/db.py | 6 +++- 7 files changed, 66 insertions(+), 14 deletions(-) create mode 100644 migrations/versions/0178_add_filename.py diff --git a/app/celery/tasks.py b/app/celery/tasks.py index 167bcf58b..d69e95ade 100644 --- a/app/celery/tasks.py +++ b/app/celery/tasks.py @@ -430,7 +430,9 @@ def update_letter_notifications_statuses(self, filename): raise DVLAException(message) billing_date = get_billing_date_in_bst_from_filename(filename) - persist_daily_sorted_letter_counts(billing_date, sorted_letter_counts) + persist_daily_sorted_letter_counts(day=billing_date, + file_name=filename, + sorted_letter_counts=sorted_letter_counts) finally: if temporary_failures: # This will alert Notify that DVLA was unable to deliver the letters, we need to investigate @@ -445,9 +447,10 @@ def get_billing_date_in_bst_from_filename(filename): return convert_utc_to_bst(datetime_obj).date() -def persist_daily_sorted_letter_counts(day, sorted_letter_counts): +def persist_daily_sorted_letter_counts(day, file_name, sorted_letter_counts): daily_letter_count = DailySortedLetter( billing_day=day, + file_name=file_name, unsorted_count=sorted_letter_counts['Unsorted'], sorted_count=sorted_letter_counts['Sorted'] ) diff --git a/app/dao/daily_sorted_letter_dao.py b/app/dao/daily_sorted_letter_dao.py index 3afad4b2a..eca7b3350 100644 --- a/app/dao/daily_sorted_letter_dao.py +++ b/app/dao/daily_sorted_letter_dao.py @@ -24,13 +24,14 @@ def dao_create_or_update_daily_sorted_letter(new_daily_sorted_letter): table = DailySortedLetter.__table__ stmt = insert(table).values( billing_day=new_daily_sorted_letter.billing_day, + file_name=new_daily_sorted_letter.file_name, unsorted_count=new_daily_sorted_letter.unsorted_count, sorted_count=new_daily_sorted_letter.sorted_count) stmt = stmt.on_conflict_do_update( - index_elements=[table.c.billing_day], + index_elements=[table.c.billing_day, table.c.file_name], set_={ - 'unsorted_count': table.c.unsorted_count + stmt.excluded.unsorted_count, - 'sorted_count': table.c.sorted_count + stmt.excluded.sorted_count, + 'unsorted_count': stmt.excluded.unsorted_count, + 'sorted_count': stmt.excluded.sorted_count, 'updated_at': datetime.utcnow() } ) diff --git a/app/models.py b/app/models.py index e78fd18ea..018eaca1b 100644 --- a/app/models.py +++ b/app/models.py @@ -1726,11 +1726,15 @@ class DailySortedLetter(db.Model): __tablename__ = "daily_sorted_letter" id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - billing_day = db.Column(db.Date, nullable=False, index=True, unique=True) + billing_day = db.Column(db.Date, nullable=False, index=True) + file_name = db.Column(db.String, nullable=True, index=True) unsorted_count = db.Column(db.Integer, nullable=False, default=0) sorted_count = db.Column(db.Integer, nullable=False, default=0) updated_at = db.Column(db.DateTime, nullable=True, onupdate=datetime.datetime.utcnow) + __table_args__ = (UniqueConstraint('file_name', 'billing_day', name='uix_file_name_billing_day'), + ) + class FactBilling(db.Model): __tablename__ = "ft_billing" diff --git a/migrations/versions/0178_add_filename.py b/migrations/versions/0178_add_filename.py new file mode 100644 index 000000000..9855031ee --- /dev/null +++ b/migrations/versions/0178_add_filename.py @@ -0,0 +1,29 @@ +""" + +Revision ID: 0178_add_filename +Revises: 0177_add_virus_scan_statuses +Create Date: 2018-03-14 16:15:01.886998 + +""" +from alembic import op +import sqlalchemy as sa + + +revision = '0178_add_filename' +down_revision = '0177_add_virus_scan_statuses' + + +def upgrade(): + op.add_column('daily_sorted_letter', sa.Column('file_name', sa.String(), nullable=True)) + op.create_index(op.f('ix_daily_sorted_letter_file_name'), 'daily_sorted_letter', ['file_name'], unique=False) + op.create_unique_constraint('uix_file_name_billing_day', 'daily_sorted_letter', ['file_name', 'billing_day']) + op.drop_index('ix_daily_sorted_letter_billing_day', table_name='daily_sorted_letter') + op.create_index(op.f('ix_daily_sorted_letter_billing_day'), 'daily_sorted_letter', ['billing_day'], unique=False) + + +def downgrade(): + op.drop_index(op.f('ix_daily_sorted_letter_billing_day'), table_name='daily_sorted_letter') + op.create_index('ix_daily_sorted_letter_billing_day', 'daily_sorted_letter', ['billing_day'], unique=True) + op.drop_constraint('uix_file_name_billing_day', 'daily_sorted_letter', type_='unique') + op.drop_index(op.f('ix_daily_sorted_letter_file_name'), table_name='daily_sorted_letter') + op.drop_column('daily_sorted_letter', 'file_name') diff --git a/tests/app/celery/test_ftp_update_tasks.py b/tests/app/celery/test_ftp_update_tasks.py index cffb22e28..67c7d994e 100644 --- a/tests/app/celery/test_ftp_update_tasks.py +++ b/tests/app/celery/test_ftp_update_tasks.py @@ -208,7 +208,9 @@ def test_update_letter_notifications_statuses_persists_daily_sorted_letter_count update_letter_notifications_statuses(filename='NOTIFY-20170823160812-RSP.TXT') - persist_letter_count_mock.assert_called_once_with(date(2017, 8, 23), {'Unsorted': 1, 'Sorted': 1}) + persist_letter_count_mock.assert_called_once_with(day=date(2017, 8, 23), + file_name='NOTIFY-20170823160812-RSP.TXT', + sorted_letter_counts={'Unsorted': 1, 'Sorted': 1}) def test_update_letter_notifications_statuses_persists_daily_sorted_letter_count_with_no_sorted_values( @@ -329,7 +331,7 @@ def test_get_billing_date_in_bst_from_filename(filename_date, billing_date): @freeze_time("2018-01-11 09:00:00") def test_persist_daily_sorted_letter_counts_saves_sorted_and_unsorted_values(client, notify_db_session): letter_counts = defaultdict(int, **{'Unsorted': 5, 'Sorted': 1}) - persist_daily_sorted_letter_counts(date.today(), letter_counts) + persist_daily_sorted_letter_counts(date.today(), "test.txt", letter_counts) day = dao_get_daily_sorted_letter_by_billing_day(date.today()) assert day.unsorted_count == 5 diff --git a/tests/app/dao/test_daily_sorted_letter_dao.py b/tests/app/dao/test_daily_sorted_letter_dao.py index 2a4ceb318..8c1183b58 100644 --- a/tests/app/dao/test_daily_sorted_letter_dao.py +++ b/tests/app/dao/test_daily_sorted_letter_dao.py @@ -20,7 +20,10 @@ def test_dao_get_daily_sorted_letter_by_billing_day(notify_db, notify_db_session def test_dao_create_or_update_daily_sorted_letter_creates_a_new_entry(notify_db, notify_db_session): billing_day = date(2018, 2, 1) - dsl = DailySortedLetter(billing_day=billing_day, unsorted_count=2, sorted_count=0) + dsl = DailySortedLetter(billing_day=billing_day, + file_name="Notify-201802011234.rs.txt", + unsorted_count=2, + sorted_count=0) dao_create_or_update_daily_sorted_letter(dsl) daily_sorted_letter = dao_get_daily_sorted_letter_by_billing_day(billing_day) @@ -35,13 +38,19 @@ def test_dao_create_or_update_daily_sorted_letter_updates_an_existing_entry( notify_db, notify_db_session ): - create_daily_sorted_letter(unsorted_count=2, sorted_count=3) + create_daily_sorted_letter(billing_day=date(2018, 1, 18), + file_name="Notify-20180118123.rs.txt", + unsorted_count=2, + sorted_count=3) - dsl = DailySortedLetter(billing_day=date(2018, 1, 18), unsorted_count=5, sorted_count=17) + dsl = DailySortedLetter(billing_day=date(2018, 1, 18), + file_name="Notify-20180118123.rs.txt", + unsorted_count=5, + sorted_count=17) dao_create_or_update_daily_sorted_letter(dsl) daily_sorted_letter = dao_get_daily_sorted_letter_by_billing_day(dsl.billing_day) - assert daily_sorted_letter.unsorted_count == 7 - assert daily_sorted_letter.sorted_count == 20 + assert daily_sorted_letter.unsorted_count == 5 + assert daily_sorted_letter.sorted_count == 17 assert daily_sorted_letter.updated_at diff --git a/tests/app/db.py b/tests/app/db.py index 87c344d3e..553e3c188 100644 --- a/tests/app/db.py +++ b/tests/app/db.py @@ -509,9 +509,13 @@ def create_invited_org_user(organisation, invited_by, email_address='invite@exam return invited_org_user -def create_daily_sorted_letter(billing_day=date(2018, 1, 18), unsorted_count=0, sorted_count=0): +def create_daily_sorted_letter(billing_day=date(2018, 1, 18), + file_name="Notify-20180118123.rs.txt", + unsorted_count=0, + sorted_count=0): daily_sorted_letter = DailySortedLetter( billing_day=billing_day, + file_name=file_name, unsorted_count=unsorted_count, sorted_count=sorted_count ) From 8a11db7b2cf02741d0b33512bc049c048434c9c7 Mon Sep 17 00:00:00 2001 From: Rebecca Law Date: Thu, 15 Mar 2018 10:49:18 +0000 Subject: [PATCH 2/2] Handle duplicate calls to update_letter_notifications_statuses As it turns out the SNS topic is trigger more that once when a file is placed in S3, this is caused by a bug in the s3ftp software used to mount the S3 bucket to the FTP server. S3ftp performs the create file operation more than once. This is resulting in duplicate counts of DailySortedLetter (the counts of how many letters marked as sorted or unsorted, which affect how much the provider will charge Notify for the letter). This PR adds a new column to DailySortedLetter called file_name. A new unique constraint on billing_day + file_name is added. Each time we write a row to the table the counts are over written rather than aggregated. I am aware that this PR is not backwards compatiable. However, since the code is typically triggered once a day around 13:00 then it is very unlikely an exception will occur during the deploy. Also a complete migration of the data based on all our response files on S3 will be performed soon, meaning the data will be corrected. Also if an exception does occur it is after the updates to notification status has already occurred. --- migrations/versions/0178_add_filename.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migrations/versions/0178_add_filename.py b/migrations/versions/0178_add_filename.py index 9855031ee..beb9516ce 100644 --- a/migrations/versions/0178_add_filename.py +++ b/migrations/versions/0178_add_filename.py @@ -14,6 +14,8 @@ down_revision = '0177_add_virus_scan_statuses' def upgrade(): + # Deleting the data here is ok because a full migration from the files on s3 is coming. + op.execute("DELETE FROM daily_sorted_letter") op.add_column('daily_sorted_letter', sa.Column('file_name', sa.String(), nullable=True)) op.create_index(op.f('ix_daily_sorted_letter_file_name'), 'daily_sorted_letter', ['file_name'], unique=False) op.create_unique_constraint('uix_file_name_billing_day', 'daily_sorted_letter', ['file_name', 'billing_day'])