From 87b26868750419af62b54d827970d3f1286a8e7e Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Thu, 16 Jan 2020 16:58:26 +0000 Subject: [PATCH 1/6] =?UTF-8?q?Use=20time=20to=20determine=20why=20notific?= =?UTF-8?q?ations=20don=E2=80=99t=20exist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Notifications won’t exist for a job if: - it’s just started - it started a long time ago (older than the retention period) We have a bug where: 1. Job starts processing, puts notifications on queue 2. Job finishes processing, sets status to `finished` 3. First notification gets picked up off the queue and put in the database In between 2. and 3. it’s possible for a job to be finished, but also to have no notifications. We’re saying this is because the notifications have been deleted, whereas really it’s because they haven’t been created yet. This commit fixes that bug by introducing the concept of recency for jobs. ‘Recent’ is defined as 1 day, which is: - a lot longer than it takes to create any notifications - a bit shorter than anyone’s retention time N.B. `processing_started` is defined here: https://github.com/alphagov/notifications-api/blob/879ba1d5f092dbf6e15bf9adadd53e4657660490/app/models.py#L1194 It can be `None` for scheduled jobs that haven’t started yet. --- app/models/job.py | 17 ++++++++ .../partials/jobs/notifications.html | 2 +- tests/__init__.py | 8 +++- tests/app/main/views/test_jobs.py | 41 ++++++++++++++++--- 4 files changed, 60 insertions(+), 8 deletions(-) diff --git a/app/models/job.py b/app/models/job.py index 855aa1ce8..76ca7d2f1 100644 --- a/app/models/job.py +++ b/app/models/job.py @@ -48,6 +48,12 @@ class Job(JSONModel): def scheduled_for(self): return self._dict.get('scheduled_for') + @property + def processing_started(self): + if not self._dict.get('processing_started'): + return None + return datetime.strptime(self._dict['processing_started'][:-6], '%Y-%m-%dT%H:%M:%S') + def _aggregate_statistics(self, *statuses): return sum( outcome['count'] for outcome in self._dict['statistics'] @@ -95,6 +101,17 @@ class Job(JSONModel): def finished_processing(self): return self.notification_count == self.notifications_sent + @property + def recently_created(self): + if not self.processing_started: + # Assume that if processing hasn’t started yet then the job + # must have been created recently enough to not have any + # notifications yet + return True + return ( + datetime.utcnow() - self.processing_started + ).days < 1 + @property def template_id(self): return self._dict['template'] diff --git a/app/templates/partials/jobs/notifications.html b/app/templates/partials/jobs/notifications.html index 16b6c449a..41d55c73d 100644 --- a/app/templates/partials/jobs/notifications.html +++ b/app/templates/partials/jobs/notifications.html @@ -47,7 +47,7 @@ notifications, caption=uploaded_file_name, caption_visible=False, - empty_message='These messages have been deleted because they were sent more than {} days ago'.format(service_data_retention_days) if job.job_status == 'finished' else 'No messages to show yet…', + empty_message='No messages to show yet…' if job.recently_created else 'These messages have been deleted because they were sent more than {} days ago'.format(service_data_retention_days), field_headings=[ 'Recipient', 'Status' diff --git a/tests/__init__.py b/tests/__init__.py index 4348d7c3a..5555b853e 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -361,7 +361,8 @@ def job_json( notifications_sent=1, notifications_requested=1, job_status='finished', - scheduled_for='' + scheduled_for='', + processing_started=None, ): if job_id is None: job_id = str(generate_uuid()) @@ -391,8 +392,11 @@ def job_json( created_by['name'], created_by['email_address'], ), - 'scheduled_for': scheduled_for } + if scheduled_for: + data.update(scheduled_for=scheduled_for) + if processing_started: + data.update(processing_started=processing_started) return data diff --git a/tests/app/main/views/test_jobs.py b/tests/app/main/views/test_jobs.py index f8723505a..b54e2431f 100644 --- a/tests/app/main/views/test_jobs.py +++ b/tests/app/main/views/test_jobs.py @@ -1,5 +1,6 @@ import json import uuid +from datetime import datetime, timezone import pytest from flask import url_for @@ -329,28 +330,58 @@ def test_should_show_job_without_notifications( assert page.select_one('tbody').text.strip() == 'No messages to show yet…' +@freeze_time("2020-01-10 0:0:0") +@pytest.mark.parametrize('created_at, processing_started, expected_message', ( + # Recently created, not yet started + (datetime(2020, 1, 10, 0, 0, 0), None, ( + 'No messages to show yet…' + )), + # Just started + (datetime(2020, 1, 10, 0, 0, 0), datetime(2020, 1, 10, 0, 0, 1), ( + 'No messages to show yet…' + )), + # Created a while ago, just started + (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 10, 0, 0, 1), ( + 'No messages to show yet…' + )), + # Created a while ago, started a couple of days ago + (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 8, 0, 0, 1), ( + 'These messages have been deleted because they were sent more than 7 days ago' + )), +)) def test_should_show_old_job( client_request, service_one, active_user_with_permissions, mock_get_service_template, - mock_get_job, mocker, mock_get_notifications_with_no_notifications, mock_get_service_data_retention, fake_uuid, + created_at, + processing_started, + expected_message, ): + mocker.patch('app.job_api_client.get_job', return_value={ + "data": job_json( + SERVICE_ONE_ID, + active_user_with_permissions, + created_at=created_at.replace(tzinfo=timezone.utc).isoformat(), + processing_started=( + processing_started.replace(tzinfo=timezone.utc).isoformat() + if processing_started else None + ), + ), + }) page = client_request.get( 'main.view_job', - service_id=service_one['id'], + service_id=SERVICE_ONE_ID, job_id=fake_uuid, ) assert not page.select('.pill a') assert not page.select('p.hint') assert not page.select('a[download]') - assert page.select_one('tbody').text.strip() == ( - 'These messages have been deleted because they were sent more than 7 days ago' - ) + assert page.select_one('tbody').text.strip() == expected_message @freeze_time("2016-01-01 11:09:00.061258") From d93866bc7e3e137614ad0d3f30c4ec4da02b07b3 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Tue, 21 Jan 2020 12:23:30 +0000 Subject: [PATCH 2/6] Use utils function to parse datetime strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than hard-coding a format string in a bunch of different places we can use the function we already have in utils. This commit also refactors some logic around password resets to put the date-parsing changes in the most sensible bit of the codebase, so it’s clearer what the intention of the view-layer code is. --- app/main/views/new_password.py | 4 +--- app/main/views/service_settings.py | 7 +++++-- app/models/job.py | 12 +++++++++--- app/models/user.py | 10 ++++++++++ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/app/main/views/new_password.py b/app/main/views/new_password.py index ed45c3c1d..7b3c701f9 100644 --- a/app/main/views/new_password.py +++ b/app/main/views/new_password.py @@ -1,5 +1,4 @@ import json -from datetime import datetime from flask import ( current_app, @@ -29,8 +28,7 @@ def new_password(token): email_address = json.loads(token_data)['email'] user = User.from_email_address(email_address) - if user.password_changed_at and datetime.strptime(user.password_changed_at, '%Y-%m-%d %H:%M:%S.%f') > \ - datetime.strptime(json.loads(token_data)['created_at'], '%Y-%m-%d %H:%M:%S.%f'): + if user.password_changed_more_recently_than(json.loads(token_data)['created_at']): flash('The link in the email has already been used') return redirect(url_for('main.index')) diff --git a/app/main/views/service_settings.py b/app/main/views/service_settings.py index 79447ebe5..707d92a3b 100644 --- a/app/main/views/service_settings.py +++ b/app/main/views/service_settings.py @@ -14,6 +14,7 @@ from flask import ( ) from flask_login import current_user from notifications_python_client.errors import HTTPError +from notifications_utils.timezones import utc_string_to_aware_gmt_datetime from app import ( billing_api_client, @@ -475,8 +476,10 @@ def get_service_verify_reply_to_address_partials(service_id, notification_id): email_address=notification["to"], is_default=is_default ) - created_at_no_tz = notification["created_at"][:-6] - seconds_since_sending = (datetime.utcnow() - datetime.strptime(created_at_no_tz, '%Y-%m-%dT%H:%M:%S.%f')).seconds + seconds_since_sending = ( + utc_string_to_aware_gmt_datetime(datetime.utcnow().isoformat()) - + utc_string_to_aware_gmt_datetime(notification['created_at']) + ).seconds if notification["status"] in FAILURE_STATUSES or ( notification["status"] in SENDING_STATUSES and seconds_since_sending > current_app.config['REPLY_TO_EMAIL_ADDRESS_VALIDATION_TIMEOUT'] diff --git a/app/models/job.py b/app/models/job.py index 76ca7d2f1..ecb72dd95 100644 --- a/app/models/job.py +++ b/app/models/job.py @@ -5,6 +5,10 @@ from notifications_utils.letter_timings import ( get_letter_timings, letter_can_be_cancelled, ) +from notifications_utils.timezones import ( + local_timezone, + utc_string_to_aware_gmt_datetime, +) from werkzeug.utils import cached_property from app.models import JSONModel, ModelList @@ -23,6 +27,7 @@ class Job(JSONModel): 'template_version', 'original_file_name', 'created_at', + 'processing_started', 'notification_count', 'job_status', 'created_by', @@ -52,7 +57,7 @@ class Job(JSONModel): def processing_started(self): if not self._dict.get('processing_started'): return None - return datetime.strptime(self._dict['processing_started'][:-6], '%Y-%m-%dT%H:%M:%S') + return utc_string_to_aware_gmt_datetime(self._dict['processing_started']) def _aggregate_statistics(self, *statuses): return sum( @@ -109,7 +114,7 @@ class Job(JSONModel): # notifications yet return True return ( - datetime.utcnow() - self.processing_started + datetime.utcnow().astimezone(local_timezone) - self.processing_started ).days < 1 @property @@ -142,7 +147,8 @@ class Job(JSONModel): return False if not letter_can_be_cancelled( - 'created', datetime.strptime(self.created_at[:-6], '%Y-%m-%dT%H:%M:%S.%f') + 'created', + utc_string_to_aware_gmt_datetime(self.created_at).replace(tzinfo=None) ): return False diff --git a/app/models/user.py b/app/models/user.py index 61674ce30..ac465000a 100644 --- a/app/models/user.py +++ b/app/models/user.py @@ -1,6 +1,7 @@ from flask import abort, current_app, request, session from flask_login import AnonymousUserMixin, UserMixin, login_user from notifications_python_client.errors import HTTPError +from notifications_utils.timezones import utc_string_to_aware_gmt_datetime from werkzeug.utils import cached_property from app.models import JSONModel, ModelList @@ -108,6 +109,15 @@ class User(JSONModel, UserMixin): response = user_api_client.update_password(self.id, password) self.__init__(response) + def password_changed_more_recently_than(self, datetime_string): + if not self.password_changed_at: + return False + return utc_string_to_aware_gmt_datetime( + self.password_changed_at + ) > utc_string_to_aware_gmt_datetime( + datetime_string + ) + def set_permissions(self, service_id, permissions, folder_permissions): user_api_client.set_user_permissions( self.id, From f6a263a7b20732df713f0882821a39e726371dc7 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Tue, 21 Jan 2020 14:07:23 +0000 Subject: [PATCH 3/6] Rename property to more accurately describe purpose `recently_created` says it would just be looking at the `created_at` field to see if it's been created recently. Technically this method isn't doing that, whilst its behaviour would be similar, it's actually different and maybe therefore a bit misleading. --- app/models/job.py | 2 +- app/templates/partials/jobs/notifications.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/job.py b/app/models/job.py index ecb72dd95..3c86c075e 100644 --- a/app/models/job.py +++ b/app/models/job.py @@ -107,7 +107,7 @@ class Job(JSONModel): return self.notification_count == self.notifications_sent @property - def recently_created(self): + def awaiting_processing_or_recently_processed(self): if not self.processing_started: # Assume that if processing hasn’t started yet then the job # must have been created recently enough to not have any diff --git a/app/templates/partials/jobs/notifications.html b/app/templates/partials/jobs/notifications.html index 41d55c73d..869573da2 100644 --- a/app/templates/partials/jobs/notifications.html +++ b/app/templates/partials/jobs/notifications.html @@ -47,7 +47,7 @@ notifications, caption=uploaded_file_name, caption_visible=False, - empty_message='No messages to show yet…' if job.recently_created else 'These messages have been deleted because they were sent more than {} days ago'.format(service_data_retention_days), + empty_message='No messages to show yet…' if job.awaiting_processing_or_recently_processed else 'These messages have been deleted because they were sent more than {} days ago'.format(service_data_retention_days), field_headings=[ 'Recipient', 'Status' From f5ced76e63c9a61e1e27f3a937f8497f51e6ad70 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Tue, 21 Jan 2020 14:09:54 +0000 Subject: [PATCH 4/6] =?UTF-8?q?Don=E2=80=99t=20mock=20job=20processing=20i?= =?UTF-8?q?n=20the=20future?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It could hide bugs or make the tests harder to understand later on if what they’re testing is an impossible scenario. --- tests/app/main/views/test_jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/app/main/views/test_jobs.py b/tests/app/main/views/test_jobs.py index b54e2431f..1160a337c 100644 --- a/tests/app/main/views/test_jobs.py +++ b/tests/app/main/views/test_jobs.py @@ -330,7 +330,7 @@ def test_should_show_job_without_notifications( assert page.select_one('tbody').text.strip() == 'No messages to show yet…' -@freeze_time("2020-01-10 0:0:0") +@freeze_time("2020-01-10 1:0:0") @pytest.mark.parametrize('created_at, processing_started, expected_message', ( # Recently created, not yet started (datetime(2020, 1, 10, 0, 0, 0), None, ( From d27d400f530e9f780a7fd76d92456ddc811983a1 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Tue, 21 Jan 2020 14:15:48 +0000 Subject: [PATCH 5/6] Test the boundary conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since date math can be hard it’s good to test as close to the boundary as possible, which hopefully catches stuff like timezone related bugs. --- tests/app/main/views/test_jobs.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/app/main/views/test_jobs.py b/tests/app/main/views/test_jobs.py index 1160a337c..25a79afdf 100644 --- a/tests/app/main/views/test_jobs.py +++ b/tests/app/main/views/test_jobs.py @@ -344,8 +344,12 @@ def test_should_show_job_without_notifications( (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 10, 0, 0, 1), ( 'No messages to show yet…' )), - # Created a while ago, started a couple of days ago - (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 8, 0, 0, 1), ( + # Created a while ago, started just within the last 24h + (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 9, 1, 0, 1), ( + 'No messages to show yet…' + )), + # Created a while ago, started exactly 24h ago + (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 9, 1, 0, 0), ( 'These messages have been deleted because they were sent more than 7 days ago' )), )) From 3b108e8c934f4553e2f0c76ab59ec0dc114a3f45 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Thu, 23 Jan 2020 16:31:21 +0000 Subject: [PATCH 6/6] Add comment clarifying timings in test case --- tests/app/main/views/test_jobs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/app/main/views/test_jobs.py b/tests/app/main/views/test_jobs.py index c82ead594..f6e9b0ea4 100644 --- a/tests/app/main/views/test_jobs.py +++ b/tests/app/main/views/test_jobs.py @@ -349,6 +349,12 @@ def test_should_show_job_without_notifications( 'No messages to show yet…' )), # Created a while ago, started exactly 24h ago + # --- + # It doesn’t matter that 24h (1 day) and 7 days (the service’s data + # retention) don’t match up. We’re testing the case of no + # notifications existing more than 1 day after the job started + # processing. In this case we assume it’s because the service’s + # data retention has kicked in. (datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 9, 1, 0, 0), ( 'These messages have been deleted because they were sent more than 7 days ago' )),