mirror of
https://github.com/GSA/notifications-api.git
synced 2026-01-30 06:21:50 -05:00
reduce number of services we try and delete notifications for
TLDR: Don't return as many services, and only return their IDs and not the whole service objects. Context: the delete notifications nightly task has been taking longer and longer, and to delete all three notification types in sequence it now takes up to 8 hours. This is because we were retrieving all services, loading them into memory on the worker, and then trying to delete notifications for each service in turn. While it does use a fair chunk of IOPS/CPU on our postgres db, we're not anywhere close to capacity on those (20% CPU, 4k IOPS out of 30k max)[1] The real issue appears to be that the task is CPU bound on the periodic worker - we see the worker spike up to 100% CPU regularly across the whole 3am-11am period. We also noticed that for each notification type the task first processes services with custom data retention (not many but some of the biggest users), then deals with all other services. We can see from looking at kibana that, for example, the task starts at 3am, and the custom data retention service email deletions are finished by 3:12am. The rest of the emails don't get deleted until 5am, so we knew that the problem is with how it handles the other services. There are currently 17000 services in the database. On a typical day, ~800 services will have notifications that are over 7 days old and need to be deleted. By only returning these services, we reduce the amount of data transfer and serialisation that needs to happen. It takes about two minutes to retrieve the distinct service ids from the notifications table for sms notifications, but that is only 5% the size of the full list so cuts down on a lot of processing Also, by only returning service_ids rather than the whole `Service` model we avoid sqlalchemy needing to do lots of data serialisation, when we were only using the `Service.id` field from that result anyway. [1] https://admin.cloud.service.gov.uk/organisations/55b1eb7d-e4c5-4359-9466-dd3ca5b0e457/spaces/80d769ff-7b01-49a4-9fa4-f87edd5328f9/services/6093d337-6918-4b97-9709-97529114eb90/metrics [2] https://grafana-paas.cloudapps.digital/d/_GlGBNbmk/notify-apps?orgId=2&refresh=5s&var-space=production&var-app=notify-delivery-worker-periodic&from=now-24h&to=now [3] https://kibana.logit.io/s/9423a789-282c-4113-908d-0be3b1bc9d1d/app/kibana#/discover?_g=(refreshInterval:(display:Off,pause:!f,value:0),time:(from:now-24h,mode:quick,to:now))&_a=(columns:!(message),index:'logstash-*',interval:auto,query:(query_string:(analyze_wildcard:!t,query:'%22Deleting%20email%20notifications%20for%20services%20without%20flexible%20data%20retention%22')),sort:!('@timestamp',desc))
This commit is contained in:
@@ -47,7 +47,6 @@ from app.models import (
|
||||
Notification,
|
||||
NotificationHistory,
|
||||
ProviderDetails,
|
||||
Service,
|
||||
ServiceDataRetention,
|
||||
)
|
||||
from app.utils import (
|
||||
@@ -323,16 +322,31 @@ def delete_notifications_older_than_retention_by_type(notification_type, qry_lim
|
||||
deleted += _move_notifications_to_notification_history(
|
||||
notification_type, f.service_id, day_to_delete_backwards_from, qry_limit)
|
||||
|
||||
current_app.logger.info(
|
||||
'Deleting {} notifications for services without flexible data retention'.format(notification_type))
|
||||
|
||||
seven_days_ago = get_london_midnight_in_utc(convert_utc_to_bst(datetime.utcnow()).date()) - timedelta(days=7)
|
||||
services_with_data_retention = [x.service_id for x in flexible_data_retention]
|
||||
service_ids_to_purge = Service.query.filter(Service.id.notin_(services_with_data_retention)).all()
|
||||
service_ids_with_data_retention = {x.service_id for x in flexible_data_retention}
|
||||
|
||||
for service in service_ids_to_purge:
|
||||
# get a list of all service ids that we'll need to delete for. Typically that might only be 5% of services.
|
||||
# This query takes a couple of mins to run.
|
||||
service_ids_that_have_sent_notifications_recently = {
|
||||
row.service_id
|
||||
for row in db.session.query(
|
||||
Notification.service_id
|
||||
).filter(
|
||||
Notification.notification_type == notification_type,
|
||||
Notification.created_at < seven_days_ago
|
||||
).distinct()
|
||||
}
|
||||
|
||||
service_ids_to_purge = service_ids_that_have_sent_notifications_recently - service_ids_with_data_retention
|
||||
|
||||
current_app.logger.info('Deleting {} notifications for {} services without flexible data retention'.format(
|
||||
notification_type,
|
||||
len(service_ids_to_purge)
|
||||
))
|
||||
|
||||
for service_id in service_ids_to_purge:
|
||||
deleted += _move_notifications_to_notification_history(
|
||||
notification_type, service.id, seven_days_ago, qry_limit)
|
||||
notification_type, service_id, seven_days_ago, qry_limit)
|
||||
|
||||
current_app.logger.info('Finished deleting {} notifications'.format(notification_type))
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from datetime import date, datetime, timedelta
|
||||
from unittest.mock import ANY, call
|
||||
|
||||
import boto3
|
||||
import pytest
|
||||
@@ -446,3 +447,57 @@ def test_insert_notification_history_delete_notifications_insert_for_key_type(sa
|
||||
assert len(notifications) == 1
|
||||
assert with_test_key.id == notifications[0].id
|
||||
assert len(history_rows) == 2
|
||||
|
||||
|
||||
@freeze_time('2020-12-10')
|
||||
def test_delete_notifications_only_runs_move_notifications_for_services_that_have_things_to_delete(mocker):
|
||||
move_notifications_mock = mocker.patch(
|
||||
'app.dao.notifications_dao._move_notifications_to_notification_history',
|
||||
return_value=0
|
||||
)
|
||||
|
||||
service_retention_will_delete = create_service(service_name='a')
|
||||
service_retention_nothing_to_delete = create_service(service_name='b')
|
||||
service_will_delete = create_service(service_name='c')
|
||||
service_nothing_to_delete = create_service(service_name='d')
|
||||
|
||||
create_service_data_retention(
|
||||
service_retention_will_delete,
|
||||
notification_type='sms',
|
||||
days_of_retention=3
|
||||
)
|
||||
create_service_data_retention(
|
||||
service_retention_nothing_to_delete,
|
||||
notification_type='sms',
|
||||
days_of_retention=3
|
||||
)
|
||||
|
||||
create_template(service_retention_will_delete)
|
||||
create_template(service_retention_nothing_to_delete)
|
||||
create_template(service_will_delete)
|
||||
nothing_to_delete_sms_template = create_template(service_nothing_to_delete, template_type='sms')
|
||||
nothing_to_delete_email_template = create_template(service_nothing_to_delete, template_type='email')
|
||||
|
||||
# this notification will be deleted because it's past retention
|
||||
create_notification(service_retention_will_delete.templates[0], created_at=datetime.now() - timedelta(days=8))
|
||||
|
||||
# will be deleted as service has no custom retention, but past our default 7 days
|
||||
create_notification(service_will_delete.templates[0], created_at=datetime.now() - timedelta(days=8))
|
||||
|
||||
# will be kept as it's recent, but we'll still run _move_notifications_to_notification_history
|
||||
# as it's for a service with custom retention
|
||||
create_notification(service_retention_nothing_to_delete.templates[0], created_at=datetime.now() - timedelta(days=2))
|
||||
|
||||
# will be kept as it's recent, and we won't run _move_notifications_to_notification_history
|
||||
create_notification(nothing_to_delete_sms_template, created_at=datetime.now() - timedelta(days=2))
|
||||
# this is an old notification, but for email not sms, so we won't run _move_notifications_to_notification_history
|
||||
create_notification(nothing_to_delete_email_template, created_at=datetime.now() - timedelta(days=2))
|
||||
|
||||
delete_notifications_older_than_retention_by_type('sms')
|
||||
|
||||
# called for all services except for "service_nothing_to_delete"
|
||||
move_notifications_mock.assert_has_calls([
|
||||
call('sms', service_retention_will_delete.id, ANY, ANY),
|
||||
call('sms', service_retention_nothing_to_delete.id, ANY, ANY),
|
||||
call('sms', service_will_delete.id, ANY, ANY),
|
||||
], any_order=True)
|
||||
|
||||
Reference in New Issue
Block a user