Refactor statsd logging

Removed all existing statsd logging and replaced with:

- statsd decorator. Infers the stat name from the decorated function call. Delegates statsd call to statsd client. Calls incr and timing for each decorated method. This is applied to all tasks and all dao methods that touch the notifications/notification_history tables

- statsd client changed to prefix all stats with "notification.api."

- Relies on https://github.com/alphagov/notifications-utils/pull/61 for request logging. Once integrated we pass the statsd client to the logger, allowing us to statsd all API calls. This passes in the start time and the method to be called (NOT the url) onto the global flask object. We then construct statsd counters and timers in the following way

	notifications.api.POST.notifications.send_notification.200

This should allow us to aggregate to the level of

	- API or ADMIN
	- POST or GET etc
	- modules
	- methods
	- status codes

Finally we count the callbacks received from 3rd parties to mapped status.
This commit is contained in:
Martyn Inglis
2016-08-05 10:44:43 +01:00
parent 3128e79e7c
commit f223446f73
18 changed files with 121 additions and 223 deletions

View File

@@ -24,6 +24,7 @@ from notifications_utils.template import Template
from notifications_utils.renderers import HTMLEmail, PlainTextEmail, SMSMessage
from app.models import SMS_TYPE, EMAIL_TYPE, KEY_TYPE_TEST
from app.statsd_decorators import statsd
def retry_iteration_to_delay(retry=0):
@@ -50,9 +51,8 @@ def retry_iteration_to_delay(retry=0):
@notify_celery.task(bind=True, name="send-sms-to-provider", max_retries=5, default_retry_delay=5)
@statsd(namespace="tasks")
def send_sms_to_provider(self, service_id, notification_id):
task_start = monotonic()
service = dao_fetch_service_by_id(service_id)
provider = provider_to_use(SMS_TYPE, notification_id)
notification = get_notification_by_id(notification_id)
@@ -101,8 +101,6 @@ def send_sms_to_provider(self, service_id, notification_id):
current_app.logger.info(
"SMS {} sent to provider at {}".format(notification_id, notification.sent_at)
)
statsd_client.incr("notifications.tasks.send-sms-to-provider")
statsd_client.timing("notifications.tasks.send-sms-to-provider.task-time", monotonic() - task_start)
delta_milliseconds = (datetime.utcnow() - notification.created_at).total_seconds() * 1000
statsd_client.timing("notifications.sms.total-time", delta_milliseconds)
@@ -122,8 +120,8 @@ def provider_to_use(notification_type, notification_id):
@notify_celery.task(bind=True, name="send-email-to-provider", max_retries=5, default_retry_delay=5)
@statsd(namespace="tasks")
def send_email_to_provider(self, service_id, notification_id):
task_start = monotonic()
service = dao_fetch_service_by_id(service_id)
provider = provider_to_use(EMAIL_TYPE, notification_id)
notification = get_notification_by_id(notification_id)
@@ -183,7 +181,5 @@ def send_email_to_provider(self, service_id, notification_id):
current_app.logger.info(
"Email {} sent to provider at {}".format(notification_id, notification.sent_at)
)
statsd_client.incr("notifications.tasks.send-email-to-provider")
statsd_client.timing("notifications.tasks.send-email-to-provider.task-time", monotonic() - task_start)
delta_milliseconds = (datetime.utcnow() - notification.created_at).total_seconds() * 1000
statsd_client.timing("notifications.email.total-time", delta_milliseconds)

View File

@@ -9,9 +9,11 @@ from app.dao.invited_user_dao import delete_invitations_created_more_than_two_da
from app.dao.notifications_dao import delete_notifications_created_more_than_a_week_ago, get_notifications, \
update_notification_status_by_id
from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago
from app.statsd_decorators import statsd
@notify_celery.task(name="delete-verify-codes")
@statsd(namespace="tasks")
def delete_verify_codes():
try:
start = datetime.utcnow()
@@ -25,6 +27,7 @@ def delete_verify_codes():
@notify_celery.task(name="delete-successful-notifications")
@statsd(namespace="tasks")
def delete_successful_notifications():
try:
start = datetime.utcnow()
@@ -42,6 +45,7 @@ def delete_successful_notifications():
@notify_celery.task(name="delete-failed-notifications")
@statsd(namespace="tasks")
def delete_failed_notifications():
try:
start = datetime.utcnow()
@@ -62,6 +66,7 @@ def delete_failed_notifications():
@notify_celery.task(name="delete-invitations")
@statsd(namespace="tasks")
def delete_invitations():
try:
start = datetime.utcnow()
@@ -75,6 +80,7 @@ def delete_invitations():
@notify_celery.task(name='timeout-sending-notifications')
@statsd(namespace="tasks")
def timeout_notifications():
# TODO: optimize the query by adding the date where clause to this query.
notifications = get_notifications(filter_dict={'status': 'sending'})

View File

@@ -10,7 +10,6 @@ from notifications_utils.recipients import (
from notifications_utils.template import Template
from sqlalchemy.exc import SQLAlchemyError
from app import statsd_client
from app import (
create_uuid,
DATETIME_FORMAT,
@@ -36,11 +35,12 @@ from app.models import (
SMS_TYPE,
KEY_TYPE_NORMAL
)
from app.statsd_decorators import statsd
@notify_celery.task(name="process-job")
@statsd(namespace="tasks")
def process_job(job_id):
task_start = monotonic()
start = datetime.utcnow()
job = dao_get_job_by_id(job_id)
@@ -116,11 +116,10 @@ def process_job(job_id):
current_app.logger.info(
"Job {} created at {} started at {} finished at {}".format(job_id, job.created_at, start, finished)
)
statsd_client.incr("notifications.tasks.process-job")
statsd_client.timing("notifications.tasks.process-job.task-time", monotonic() - task_start)
@notify_celery.task(name="remove-job")
@statsd(namespace="tasks")
def remove_job(job_id):
job = dao_get_job_by_id(job_id)
s3.remove_job_from_s3(job.service.id, str(job_id))
@@ -128,6 +127,7 @@ def remove_job(job_id):
@notify_celery.task(bind=True, name="send-sms", max_retries=5, default_retry_delay=5)
@statsd(namespace="tasks")
def send_sms(self,
service_id,
notification_id,
@@ -135,7 +135,6 @@ def send_sms(self,
created_at,
api_key_id=None,
key_type=KEY_TYPE_NORMAL):
task_start = monotonic()
notification = encryption.decrypt(encrypted_notification)
service = dao_fetch_service_by_id(service_id)
@@ -154,21 +153,19 @@ def send_sms(self,
"SMS {} created at {}".format(notification_id, created_at)
)
statsd_client.incr("notifications.tasks.send-sms")
statsd_client.timing("notifications.tasks.send-sms.task-time", monotonic() - task_start)
except SQLAlchemyError as e:
current_app.logger.exception(e)
raise self.retry(queue="retry", exc=e)
@notify_celery.task(bind=True, name="send-email", max_retries=5, default_retry_delay=5)
@statsd(namespace="tasks")
def send_email(self, service_id,
notification_id,
encrypted_notification,
created_at,
api_key_id=None,
key_type=KEY_TYPE_NORMAL):
task_start = monotonic()
notification = encryption.decrypt(encrypted_notification)
service = dao_fetch_service_by_id(service_id)
@@ -182,8 +179,6 @@ def send_email(self, service_id,
send_email_to_provider.apply_async((service_id, notification_id), queue='email')
current_app.logger.info("Email {} created at {}".format(notification_id, created_at))
statsd_client.incr("notifications.tasks.send-email")
statsd_client.timing("notifications.tasks.send-email.task-time", monotonic() - task_start)
except SQLAlchemyError as e:
current_app.logger.exception(e)
raise self.retry(queue="retry", exc=e)