mirror of
https://github.com/GSA/notifications-api.git
synced 2025-12-22 08:21:13 -05:00
Include status in stats about delivery times
Previously these metrics weren't very useful because they could be
skewed by long timings for failed notifications, which can take up
to 72 hours to deliver. I'm intentionally not trying to have a dual
running period (with the old and new names) because:
- We don't use the current stats for anything (checking Grafana).
- The current stats get turned into a "bucket" metric in Prometheus
[1][2], which isn't very useful because it can only tell us the mean
time to deliver, but we're actually interested in percentiles.
Switching to a new naming is an opportunity to fix the raw data and
the way it's aggregated, using the same kind of "summary" metric that
we now use for stats about our Celery tasks [3].
[1]: c330a8ac8a/paas/statsd/statsd-mapping.yml (L82)
[2]: https://prometheus.io/docs/practices/histograms/#quantiles
[3]: https://github.com/alphagov/notifications-aws/pull/890
This commit is contained in:
@@ -70,7 +70,11 @@ def process_ses_results(self, response):
|
|||||||
statsd_client.incr('callback.ses.{}'.format(notification_status))
|
statsd_client.incr('callback.ses.{}'.format(notification_status))
|
||||||
|
|
||||||
if notification.sent_at:
|
if notification.sent_at:
|
||||||
statsd_client.timing_with_dates('callback.ses.elapsed-time', datetime.utcnow(), notification.sent_at)
|
statsd_client.timing_with_dates(
|
||||||
|
f'callback.ses.{notification_status}.elapsed-time',
|
||||||
|
datetime.utcnow(),
|
||||||
|
notification.sent_at
|
||||||
|
)
|
||||||
|
|
||||||
_check_and_queue_callback_task(notification)
|
_check_and_queue_callback_task(notification)
|
||||||
|
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ def _process_for_status(notification_status, client_name, provider_reference, de
|
|||||||
|
|
||||||
if notification.sent_at:
|
if notification.sent_at:
|
||||||
statsd_client.timing_with_dates(
|
statsd_client.timing_with_dates(
|
||||||
'callback.{}.elapsed-time'.format(client_name.lower()),
|
f'callback.{client_name.lower()}.{notification_status}.elapsed-time',
|
||||||
datetime.utcnow(),
|
datetime.utcnow(),
|
||||||
notification.sent_at
|
notification.sent_at
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ def test_ses_callback_should_update_notification_status(
|
|||||||
assert process_ses_results(ses_notification_callback(reference='ref'))
|
assert process_ses_results(ses_notification_callback(reference='ref'))
|
||||||
assert get_notification_by_id(notification.id).status == 'delivered'
|
assert get_notification_by_id(notification.id).status == 'delivered'
|
||||||
statsd_client.timing_with_dates.assert_any_call(
|
statsd_client.timing_with_dates.assert_any_call(
|
||||||
"callback.ses.elapsed-time", datetime.utcnow(), notification.sent_at
|
"callback.ses.delivered.elapsed-time", datetime.utcnow(), notification.sent_at
|
||||||
)
|
)
|
||||||
statsd_client.incr.assert_any_call("callback.ses.delivered")
|
statsd_client.incr.assert_any_call("callback.ses.delivered")
|
||||||
updated_notification = Notification.query.get(notification.id)
|
updated_notification = Notification.query.get(notification.id)
|
||||||
|
|||||||
@@ -144,7 +144,7 @@ def test_process_sms_client_response_records_statsd_metrics(sample_notification,
|
|||||||
|
|
||||||
statsd_client.incr.assert_any_call("callback.firetext.delivered")
|
statsd_client.incr.assert_any_call("callback.firetext.delivered")
|
||||||
statsd_client.timing_with_dates.assert_any_call(
|
statsd_client.timing_with_dates.assert_any_call(
|
||||||
"callback.firetext.elapsed-time", datetime.utcnow(), sample_notification.sent_at
|
"callback.firetext.delivered.elapsed-time", datetime.utcnow(), sample_notification.sent_at
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user