Merge pull request #1833 from alphagov/read-redis

Read template usage stats from new redis keys
This commit is contained in:
Leo Hemsted
2018-04-30 15:15:16 +01:00
committed by GitHub
11 changed files with 604 additions and 739 deletions

View File

@@ -28,8 +28,3 @@ class DAOClass(object):
db.session.delete(inst)
if _commit:
db.session.commit()
def days_ago(number_of_days):
from datetime import date, timedelta
return date.today() - timedelta(days=number_of_days)

View File

@@ -4,15 +4,13 @@ from datetime import datetime, timedelta
from flask import current_app
from notifications_utils.statsd_decorators import statsd
from sqlalchemy import (
Date as sql_date,
asc,
cast,
desc,
func,
)
from app import db
from app.dao import days_ago
from app.utils import midnight_n_days_ago
from app.models import (
Job,
JOB_STATUS_PENDING,
@@ -53,7 +51,7 @@ def dao_get_jobs_by_service_id(service_id, limit_days=None, page=1, page_size=50
Job.original_file_name != current_app.config['ONE_OFF_MESSAGE_FILENAME'],
]
if limit_days is not None:
query_filter.append(cast(Job.created_at, sql_date) >= days_ago(limit_days))
query_filter.append(Job.created_at >= midnight_n_days_ago(limit_days))
if statuses is not None and statuses != ['']:
query_filter.append(
Job.job_status.in_(statuses)

View File

@@ -21,7 +21,7 @@ from sqlalchemy.sql import functions
from notifications_utils.international_billing_rates import INTERNATIONAL_BILLING_RATES
from app import db, create_uuid
from app.dao import days_ago
from app.utils import midnight_n_days_ago
from app.errors import InvalidRequest
from app.models import (
Notification,
@@ -46,51 +46,37 @@ from app.models import (
)
from app.dao.dao_utils import transactional
from app.utils import convert_utc_to_bst
from app.utils import convert_utc_to_bst, get_london_midnight_in_utc
@statsd(namespace="dao")
def dao_get_template_usage(service_id, limit_days=None):
query_filter = []
table = NotificationHistory
if limit_days is not None and limit_days <= 7:
table = Notification
# only limit days if it's not seven days, as 7 days == the whole of Notifications table.
if limit_days != 7:
query_filter.append(table.created_at >= days_ago(limit_days))
elif limit_days is not None:
# case where not under 7 days, so using NotificationsHistory so limit allowed
query_filter.append(table.created_at >= days_ago(limit_days))
query_filter.append(table.service_id == service_id)
query_filter.append(table.key_type != KEY_TYPE_TEST)
# only limit days if it's not seven days, as 7 days == the whole of Notifications table.
if limit_days is not None and limit_days != 7:
query_filter.append(table.created_at >= days_ago(limit_days))
def dao_get_template_usage(service_id, day):
start = get_london_midnight_in_utc(day)
end = get_london_midnight_in_utc(day + timedelta(days=1))
notifications_aggregate_query = db.session.query(
func.count().label('count'),
table.template_id
Notification.template_id
).filter(
*query_filter
Notification.created_at >= start,
Notification.created_at < end,
Notification.service_id == service_id,
Notification.key_type != KEY_TYPE_TEST,
).group_by(
table.template_id
Notification.template_id
).subquery()
query = db.session.query(
Template.id.label('template_id'),
Template.id,
Template.name,
Template.template_type,
Template.is_precompiled_letter,
notifications_aggregate_query.c.count
).join(
func.coalesce(notifications_aggregate_query.c.count, 0).label('count')
).outerjoin(
notifications_aggregate_query,
notifications_aggregate_query.c.template_id == Template.id
).filter(
Template.service_id == service_id
).order_by(Template.name)
return query.all()
@@ -262,8 +248,7 @@ def get_notifications_for_service(
filters = [Notification.service_id == service_id]
if limit_days is not None:
days_ago = date.today() - timedelta(days=limit_days)
filters.append(func.date(Notification.created_at) >= days_ago)
filters.append(Notification.created_at >= midnight_n_days_ago(limit_days))
if older_than is not None:
older_than_created_at = db.session.query(

View File

@@ -2,7 +2,6 @@ from datetime import datetime
import uuid
from sqlalchemy import asc, desc
from sqlalchemy.sql.expression import bindparam
from app import db
from app.models import (
@@ -124,25 +123,16 @@ def dao_get_template_versions(service_id, template_id):
).all()
def dao_get_templates_for_cache(cache):
if not cache or len(cache) == 0:
return []
# First create a subquery that is a union select of the cache values
# Then join templates to the subquery
cache_queries = [
db.session.query(bindparam("template_id" + str(i),
uuid.UUID(template_id.decode())).label('template_id'),
bindparam("count" + str(i), int(count.decode())).label('count'))
for i, (template_id, count) in enumerate(cache)]
cache_subq = cache_queries[0].union(*cache_queries[1:]).subquery()
query = db.session.query(Template.id.label('template_id'),
Template.template_type,
Template.name,
Template.is_precompiled_letter,
cache_subq.c.count.label('count')
).join(cache_subq,
Template.id == cache_subq.c.template_id
).order_by(Template.name)
def dao_get_multiple_template_details(template_ids):
query = db.session.query(
Template.id,
Template.template_type,
Template.name,
Template.is_precompiled_letter
).filter(
Template.id.in_(template_ids)
).order_by(
Template.name
)
return query.all()

View File

@@ -2,7 +2,8 @@ from flask import (
Blueprint,
jsonify,
request,
current_app)
current_app
)
from app import redis_store
from app.dao.notifications_dao import (
@@ -10,15 +11,16 @@ from app.dao.notifications_dao import (
dao_get_last_template_usage
)
from app.dao.templates_dao import (
dao_get_templates_for_cache,
dao_get_multiple_template_details,
dao_get_template_by_id_and_service_id
)
from app.schemas import notification_with_template_schema
from app.utils import cache_key_for_service_template_counter
from app.utils import cache_key_for_service_template_usage_per_day, last_n_days
from app.errors import register_errors, InvalidRequest
from collections import Counter
template_statistics = Blueprint('template-statistics',
template_statistics = Blueprint('template_statistics',
__name__,
url_prefix='/service/<service_id>/template-statistics')
@@ -27,40 +29,22 @@ register_errors(template_statistics)
@template_statistics.route('')
def get_template_statistics_for_service_by_day(service_id):
if request.args.get('limit_days'):
try:
limit_days = int(request.args['limit_days'])
except ValueError as e:
error = '{} is not an integer'.format(request.args['limit_days'])
message = {'limit_days': [error]}
raise InvalidRequest(message, status_code=400)
else:
limit_days = None
try:
limit_days = int(request.args.get('limit_days', ''))
except ValueError:
error = '{} is not an integer'.format(request.args.get('limit_days'))
message = {'limit_days': [error]}
raise InvalidRequest(message, status_code=400)
if limit_days == 7:
stats = get_template_statistics_for_7_days(limit_days, service_id)
else:
stats = dao_get_template_usage(service_id, limit_days=limit_days)
if limit_days < 1 or limit_days > 7:
raise InvalidRequest({'limit_days': ['limit_days must be between 1 and 7']}, status_code=400)
def serialize(data):
return {
'count': data.count,
'template_id': str(data.template_id),
'template_name': data.name,
'template_type': data.template_type,
'is_precompiled_letter': data.is_precompiled_letter
}
return jsonify(data=[serialize(row) for row in stats])
return jsonify(data=_get_template_statistics_for_last_n_days(service_id, limit_days))
@template_statistics.route('/<template_id>')
def get_template_statistics_for_template_id(service_id, template_id):
template = dao_get_template_by_id_and_service_id(template_id, service_id)
if not template:
message = 'No template found for id {}'.format(template_id)
errors = {'template_id': [message]}
raise InvalidRequest(errors, status_code=404)
data = None
notification = dao_get_last_template_usage(template_id, template.template_type)
@@ -70,31 +54,46 @@ def get_template_statistics_for_template_id(service_id, template_id):
return jsonify(data=data)
def get_template_statistics_for_7_days(limit_days, service_id):
cache_key = cache_key_for_service_template_counter(service_id)
template_stats_by_id = redis_store.get_all_from_hash(cache_key)
if not template_stats_by_id:
stats = dao_get_template_usage(service_id, limit_days=limit_days)
cache_values = dict([(x.template_id, x.count) for x in stats])
if cache_values:
redis_store.set_hash_and_expire(cache_key,
cache_values,
current_app.config['EXPIRE_CACHE_TEN_MINUTES'])
else:
stats = dao_get_templates_for_cache(template_stats_by_id.items())
return stats
def _get_template_statistics_for_last_n_days(service_id, limit_days):
template_stats_by_id = Counter()
# TODO: can only switch to this code when redis has been populated (either through time passing or a manual step)
# from collections import Counter
# from notifications_utils.redis_client import RedisException
# template_stats_by_id = Counter()
# for day in last_7_days:
# # "<SERVICE_ID>-template-usage-{YYYY-MM-DD}"
# key = cache_key_for_service_templates_used_per_day(service_id, limit_days)
# try:
# template_stats_by_id += Counter(redis_store.get_all_from_hash(key, raise_exception=True))
# except RedisException:
# # TODO: ????
#
# # TODO: streamline db query and avoid weird unions if possible.
# return dao_get_templates_for_cache(template_stats_by_id.items())
for day in last_n_days(limit_days):
# "{SERVICE_ID}-template-usage-{YYYY-MM-DD}"
key = cache_key_for_service_template_usage_per_day(service_id, day)
stats = redis_store.get_all_from_hash(key)
if stats:
stats = {
k.decode('utf-8'): int(v) for k, v in stats.items()
}
else:
# key didn't exist (or redis was down) - lets populate from DB.
stats = {
str(row.id): row.count for row in dao_get_template_usage(service_id, day=day)
}
# if there is data in db, but not in redis - lets put it in redis so we don't have to do
# this calc again next time. If there isn't any data, we can't put it in redis.
# Zero length hashes aren't a thing in redis. (There'll only be no data if the service has no templates)
# Nothing is stored if redis is down.
if stats:
redis_store.set_hash_and_expire(
key,
stats,
current_app.config['EXPIRE_CACHE_EIGHT_DAYS']
)
template_stats_by_id += Counter(stats)
# attach count from stats to name/type/etc from database
template_details = dao_get_multiple_template_details(template_stats_by_id.keys())
return [
{
'count': template_stats_by_id[str(template.id)],
'template_id': str(template.id),
'template_name': template.name,
'template_type': template.template_type,
'is_precompiled_letter': template.is_precompiled_letter
}
for template in template_details
# we don't want to return templates with no count to the front-end,
# but they're returned from the DB and might be put in redis like that (if there was no data that day)
if template_stats_by_id[str(template.id)] != 0
]

View File

@@ -39,7 +39,7 @@ def get_london_midnight_in_utc(date):
This function converts date to midnight as BST (British Standard Time) to UTC,
the tzinfo is lastly removed from the datetime because the database stores the timestamps without timezone.
:param date: the day to calculate the London midnight in UTC for
:return: the datetime of London midnight in UTC, for example 2016-06-17 = 2016-06-17 23:00:00
:return: the datetime of London midnight in UTC, for example 2016-06-17 = 2016-06-16 23:00:00
"""
return local_timezone.localize(datetime.combine(date, datetime.min.time())).astimezone(
pytz.UTC).replace(
@@ -80,6 +80,9 @@ def cache_key_for_service_template_counter(service_id, limit_days=7):
def cache_key_for_service_template_usage_per_day(service_id, datetime):
"""
You should pass a BST datetime into this function
"""
return "service-{}-template-usage-{}".format(service_id, datetime.date().isoformat())
@@ -90,3 +93,25 @@ def get_public_notify_type_text(notify_type, plural=False):
notify_type_text = 'text message'
return '{}{}'.format(notify_type_text, 's' if plural else '')
def midnight_n_days_ago(number_of_days):
"""
Returns midnight a number of days ago. Takes care of daylight savings etc.
"""
return get_london_midnight_in_utc(datetime.utcnow() - timedelta(days=number_of_days))
def last_n_days(limit_days):
"""
Returns the last n dates, oldest first. Takes care of daylight savings (but returns a date, be careful how you
manipulate it later! Don't directly use the date for comparing to UTC datetimes!). Includes today.
"""
return [
datetime.combine(
(convert_utc_to_bst(datetime.utcnow()) - timedelta(days=x)),
datetime.min.time()
)
# reverse the countdown, -1 from first two args to ensure it stays 0-indexed
for x in range(limit_days - 1, -1, -1)
]