add new redis template usage per day key

We've run into issues with redis expiring keys while we try and write
to them - short lived redis TTLs aren't really sustainable for keys
where we mutate the state. Template usage is a hash contained in redis
where we increment a count keyed by template_id each time a message is
sent for that template. But if the key expires, hincrby (redis command
for incrementing a value in a hash) will re-create an empty hash.

This is no good, as we need the hash to be populated with the last
seven days worth of data, which we then increment further. We can't
tell whether the hincrby created the key, so a different approach
entirely was needed:

* New redis key: <service_id>-template-usage-<YYYY-MM-DD>. Note: This
  YYYY-MM-DD is BTC time so it lines up nicely with ft_billing table
* Incremented to from process_notification - if it doesn't exist yet,
  it'll be created then.
* Expiry set to 8 days every time it's incremented to.

Then, at read time, we'll just read the last eight days of keys from
Redis, and sum them up. This works because we're only ever incrementing
from that one place - never setting wholesale, never recreating the
data from scratch. So we know that if the data is in redis, then it is
good and accurate data.

One thing we *don't* know and *cannot* reason about is what no key in
redis means. It could be either of:

* This is the first message that the service has sent today.
* The key was deleted from redis for some reason.

Since we set the TTL to so long, we'll never be writing to a key that
previously expired. But if there is a redis (or operator) error and the
key is deleted, then we'll have bad data - after any data loss we'll
have to rebuild the data.
This commit is contained in:
Leo Hemsted
2018-03-29 13:55:22 +01:00
parent e692e06b7f
commit 8e73961f65
6 changed files with 86 additions and 9 deletions

View File

@@ -95,7 +95,8 @@ class Config(object):
# URL of redis instance # URL of redis instance
REDIS_URL = os.getenv('REDIS_URL') REDIS_URL = os.getenv('REDIS_URL')
REDIS_ENABLED = os.getenv('REDIS_ENABLED') == '1' REDIS_ENABLED = os.getenv('REDIS_ENABLED') == '1'
EXPIRE_CACHE_IN_SECONDS = 600 EXPIRE_CACHE_TEN_MINUTES = 600
EXPIRE_CACHE_EIGHT_DAYS = 8 * 24 * 60 * 60
# Performance platform # Performance platform
PERFORMANCE_PLATFORM_ENABLED = False PERFORMANCE_PLATFORM_ENABLED = False

View File

@@ -29,7 +29,13 @@ from app.dao.notifications_dao import (
) )
from app.v2.errors import BadRequestError from app.v2.errors import BadRequestError
from app.utils import get_template_instance, cache_key_for_service_template_counter, convert_bst_to_utc from app.utils import (
cache_key_for_service_template_counter,
cache_key_for_service_template_usage_per_day,
convert_bst_to_utc,
convert_utc_to_bst,
get_template_instance,
)
def create_content_for_notification(template, personalisation): def create_content_for_notification(template, personalisation):
@@ -108,12 +114,24 @@ def persist_notification(
redis_store.incr(redis.daily_limit_cache_key(service.id)) redis_store.incr(redis.daily_limit_cache_key(service.id))
if redis_store.get_all_from_hash(cache_key_for_service_template_counter(service.id)): if redis_store.get_all_from_hash(cache_key_for_service_template_counter(service.id)):
redis_store.increment_hash_value(cache_key_for_service_template_counter(service.id), template_id) redis_store.increment_hash_value(cache_key_for_service_template_counter(service.id), template_id)
increment_template_usage_cache(service.id, template_id, notification_created_at)
current_app.logger.info( current_app.logger.info(
"{} {} created at {}".format(notification_type, notification_id, notification_created_at) "{} {} created at {}".format(notification_type, notification_id, notification_created_at)
) )
return notification return notification
def increment_template_usage_cache(service_id, template_id, created_at):
key = cache_key_for_service_template_usage_per_day(service_id, convert_utc_to_bst(created_at))
redis_store.increment_hash_value(key, template_id)
# set key to expire in eight days - we don't know if we've just created the key or not, so must assume that we
# have and reset the expiry. Eight days is longer than any notification is in the notifications table, so we'll
# always capture the full week's numbers
redis_store.expire(key, current_app.config['EXPIRE_CACHE_EIGHT_DAYS'])
def send_notification_to_queue(notification, research_mode, queue=None): def send_notification_to_queue(notification, research_mode, queue=None):
if research_mode or notification.key_type == KEY_TYPE_TEST: if research_mode or notification.key_type == KEY_TYPE_TEST:
queue = QueueNames.RESEARCH_MODE queue = QueueNames.RESEARCH_MODE

View File

@@ -79,7 +79,7 @@ def get_template_statistics_for_7_days(limit_days, service_id):
if cache_values: if cache_values:
redis_store.set_hash_and_expire(cache_key, redis_store.set_hash_and_expire(cache_key,
cache_values, cache_values,
current_app.config['EXPIRE_CACHE_IN_SECONDS']) current_app.config['EXPIRE_CACHE_TEN_MINUTES'])
else: else:
stats = dao_get_templates_for_cache(template_stats_by_id.items()) stats = dao_get_templates_for_cache(template_stats_by_id.items())
return stats return stats

View File

@@ -79,6 +79,10 @@ def cache_key_for_service_template_counter(service_id, limit_days=7):
return "{}-template-counter-limit-{}-days".format(service_id, limit_days) return "{}-template-counter-limit-{}-days".format(service_id, limit_days)
def cache_key_for_service_template_usage_per_day(service_id, datetime):
return "{}-template-usage-{}".format(service_id, datetime.date().isoformat())
def get_public_notify_type_text(notify_type, plural=False): def get_public_notify_type_text(notify_type, plural=False):
from app.models import SMS_TYPE from app.models import SMS_TYPE
notify_type_text = notify_type notify_type_text = notify_type

View File

@@ -23,6 +23,6 @@ notifications-python-client==4.8.1
# PaaS # PaaS
awscli-cwlogs>=1.4,<1.5 awscli-cwlogs>=1.4,<1.5
git+https://github.com/alphagov/notifications-utils.git@25.2.3#egg=notifications-utils==25.2.3 git+https://github.com/alphagov/notifications-utils.git@25.3.0#egg=notifications-utils==25.3.0
git+https://github.com/alphagov/boto.git@2.43.0-patch3#egg=boto==2.43.0-patch3 git+https://github.com/alphagov/boto.git@2.43.0-patch3#egg=boto==2.43.0-patch3

View File

@@ -1,11 +1,13 @@
import datetime import datetime
import uuid import uuid
import pytest from unittest.mock import call
import pytest
from boto3.exceptions import Boto3Error from boto3.exceptions import Boto3Error
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from freezegun import freeze_time from freezegun import freeze_time
from collections import namedtuple from collections import namedtuple
from flask import current_app
from app.models import ( from app.models import (
Notification, Notification,
@@ -205,9 +207,11 @@ def test_persist_notification_with_optionals(sample_job, sample_api_key, mocker)
@freeze_time("2016-01-01 11:09:00.061258") @freeze_time("2016-01-01 11:09:00.061258")
def test_persist_notification_increments_cache_if_key_exists(sample_template, sample_api_key, mocker): def test_persist_notification_doesnt_touch_cache_for_old_keys_that_dont_exist(sample_template, sample_api_key, mocker):
mock_incr = mocker.patch('app.notifications.process_notifications.redis_store.incr') mock_incr = mocker.patch('app.notifications.process_notifications.redis_store.incr')
mock_incr_hash_value = mocker.patch('app.notifications.process_notifications.redis_store.increment_hash_value') mock_incr_hash_value = mocker.patch('app.notifications.process_notifications.redis_store.increment_hash_value')
mocker.patch('app.notifications.process_notifications.redis_store.get', return_value=None)
mocker.patch('app.notifications.process_notifications.redis_store.get_all_from_hash', return_value=None)
persist_notification( persist_notification(
template_id=sample_template.id, template_id=sample_template.id,
@@ -221,11 +225,20 @@ def test_persist_notification_increments_cache_if_key_exists(sample_template, sa
reference="ref" reference="ref"
) )
mock_incr.assert_not_called() mock_incr.assert_not_called()
mock_incr_hash_value.assert_not_called() mock_incr_hash_value.assert_called_once_with(
str(sample_template.service_id) + "-template-usage-2016-01-01",
sample_template.id
)
@freeze_time("2016-01-01 11:09:00.061258")
def test_persist_notification_increments_cache_if_key_exists(sample_template, sample_api_key, mocker):
mock_incr = mocker.patch('app.notifications.process_notifications.redis_store.incr')
mock_incr_hash_value = mocker.patch('app.notifications.process_notifications.redis_store.increment_hash_value')
mocker.patch('app.notifications.process_notifications.redis_store.get', return_value=1) mocker.patch('app.notifications.process_notifications.redis_store.get', return_value=1)
mocker.patch('app.notifications.process_notifications.redis_store.get_all_from_hash', mocker.patch('app.notifications.process_notifications.redis_store.get_all_from_hash',
return_value={sample_template.id, 1}) return_value={sample_template.id, 1})
persist_notification( persist_notification(
template_id=sample_template.id, template_id=sample_template.id,
template_version=sample_template.version, template_version=sample_template.version,
@@ -236,9 +249,12 @@ def test_persist_notification_increments_cache_if_key_exists(sample_template, sa
api_key_id=sample_api_key.id, api_key_id=sample_api_key.id,
key_type=sample_api_key.key_type, key_type=sample_api_key.key_type,
reference="ref2") reference="ref2")
mock_incr.assert_called_once_with(str(sample_template.service_id) + "-2016-01-01-count", ) mock_incr.assert_called_once_with(str(sample_template.service_id) + "-2016-01-01-count", )
mock_incr_hash_value.assert_called_once_with(cache_key_for_service_template_counter(sample_template.service_id), assert mock_incr_hash_value.mock_calls == [
sample_template.id) call(str(sample_template.service_id) + "-template-counter-limit-7-days", sample_template.id),
call(str(sample_template.service_id) + "-template-usage-2016-01-01", sample_template.id),
]
@pytest.mark.parametrize('research_mode, requested_queue, expected_queue, notification_type, key_type', @pytest.mark.parametrize('research_mode, requested_queue, expected_queue, notification_type, key_type',
@@ -446,3 +462,41 @@ def test_persist_email_notification_stores_normalised_email(
assert persisted_notification.to == recipient assert persisted_notification.to == recipient
assert persisted_notification.normalised_to == expected_recipient_normalised assert persisted_notification.normalised_to == expected_recipient_normalised
@pytest.mark.parametrize('utc_time, day_in_key', [
('2016-01-01 23:00:00', '2016-01-01'),
('2016-06-01 22:59:00', '2016-06-01'),
('2016-06-01 23:00:00', '2016-06-02'),
])
def test_persist_notification_increments_and_expires_redis_template_usage(
utc_time,
day_in_key,
sample_template,
sample_api_key,
mocker
):
mock_incr_hash_value = mocker.patch('app.notifications.process_notifications.redis_store.increment_hash_value')
mock_expire = mocker.patch('app.notifications.process_notifications.redis_store.expire')
mocker.patch('app.notifications.process_notifications.redis_store.get', return_value=None)
mocker.patch('app.notifications.process_notifications.redis_store.get_all_from_hash', return_value=None)
with freeze_time(utc_time):
persist_notification(
template_id=sample_template.id,
template_version=sample_template.version,
recipient='+447111111122',
service=sample_template.service,
personalisation={},
notification_type='sms',
api_key_id=sample_api_key.id,
key_type=sample_api_key.key_type,
)
mock_incr_hash_value.assert_called_once_with(
'{}-template-usage-{}'.format(str(sample_template.service_id), day_in_key),
sample_template.id
)
mock_expire.assert_called_once_with(
'{}-template-usage-{}'.format(str(sample_template.service_id), day_in_key),
current_app.config['EXPIRE_CACHE_EIGHT_DAYS']
)