From f2f2509c9b38d960231e08c863c7591f44206a55 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Fri, 29 May 2020 13:56:29 +0100 Subject: [PATCH] add raw request timings to provider send functions we're using statsd to monitor how long provider requests are taking. However, there's lots of busy work that happens inside our statsd metrics timing window. Things like json dumping and loading, building headers, exception handling, etc. for firetext/mmg, the response object from requests has an elapsed property [1], which captures from sending raw data to parsing the response headers. for ses, it's a bit trickier, but boto3 exposes a few event hooks [2]. it's hard to find them without stepping through the code, but the interesting ones are before-call, after-call, after-call-error, request-created, and response-received. The before-call and after-call involve some marshalling, built-in retrying, etc, while request-created and response-received are much lower level. They might be called more than once per ses request, if boto3 itself retries the request on 5xx, 429 and low level socket errors [3]. Add these as new `raw-request-time` metrics rather than overwriting to avoid changing the meaning of an existing metric, and to let us compare the metrics to see if there's a noticeable difference at all [1] https://requests.readthedocs.io/en/master/api/#requests.Response.elapsed [2] https://boto3.amazonaws.com/v1/documentation/api/latest/guide/events.html [3] https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#legacy-retry-mode --- app/clients/email/aws_ses.py | 20 ++++++++++++++++++++ app/clients/sms/firetext.py | 3 +++ app/clients/sms/mmg.py | 4 ++++ 3 files changed, 27 insertions(+) diff --git a/app/clients/email/aws_ses.py b/app/clients/email/aws_ses.py index 999315588..3b4289a97 100644 --- a/app/clients/email/aws_ses.py +++ b/app/clients/email/aws_ses.py @@ -54,6 +54,22 @@ class AwsSesClient(EmailClient): self.name = 'ses' self.statsd_client = statsd_client + # events are generally undocumented, but some that might be of interest are: + # before-call, after-call, after-call-error, request-created, response-received + self._client.meta.events.register(f'request-created.ses.SendEmail', self.ses_request_created_hook) + self._client.meta.events.register(f'response-received.ses.SendEmail', self.ses_response_received_hook) + + def ses_request_created_hook(self, **kwargs): + # request created may be called multiple times if the request auto-retries. We want to count all these as the + # same request for timing purposes, so only reset the start time if it was cleared completely + if self.ses_start_time == 0: + self.ses_start_time = monotonic() + + def ses_response_received_hook(self, **kwargs): + # response received may be called multiple times if the request auto-retries, however, we want to count the last + # time it triggers for timing purposes, so always reset the elapsed time + self.ses_elapsed_time = monotonic() - self.ses_start_time + def get_name(self): return self.name @@ -64,6 +80,8 @@ class AwsSesClient(EmailClient): body, html_body='', reply_to_address=None): + self.ses_elapsed_time = 0 + self.ses_start_time = 0 try: if isinstance(to_addresses, str): to_addresses = [to_addresses] @@ -114,6 +132,8 @@ class AwsSesClient(EmailClient): elapsed_time = monotonic() - start_time current_app.logger.info("AWS SES request finished in {}".format(elapsed_time)) self.statsd_client.timing("clients.ses.request-time", elapsed_time) + if self.ses_elapsed_time != 0: + self.statsd_client.timing("clients.ses.raw-request-time", self.ses_elapsed_time) self.statsd_client.incr("clients.ses.success") return response['MessageId'] diff --git a/app/clients/sms/firetext.py b/app/clients/sms/firetext.py index 36692d29c..25848691b 100644 --- a/app/clients/sms/firetext.py +++ b/app/clients/sms/firetext.py @@ -97,6 +97,7 @@ class FiretextClient(SmsClient): "reference": reference } + response = None start_time = monotonic() try: response = request( @@ -121,4 +122,6 @@ class FiretextClient(SmsClient): elapsed_time = monotonic() - start_time self.current_app.logger.info("Firetext request for {} finished in {}".format(reference, elapsed_time)) self.statsd_client.timing("clients.firetext.request-time", elapsed_time) + if response and hasattr(response, 'elapsed'): + self.statsd_client.timing("clients.firetext.raw-request-time", response.elapsed.total_seconds()) return response diff --git a/app/clients/sms/mmg.py b/app/clients/sms/mmg.py index b122d45dc..e1c45a45f 100644 --- a/app/clients/sms/mmg.py +++ b/app/clients/sms/mmg.py @@ -71,6 +71,7 @@ class MMGClient(SmsClient): "multi": multi } + response = None start_time = monotonic() try: response = request( @@ -97,6 +98,9 @@ class MMGClient(SmsClient): finally: elapsed_time = monotonic() - start_time self.statsd_client.timing("clients.mmg.request-time", elapsed_time) + if response and hasattr(response, 'elapsed'): + self.statsd_client.timing("clients.mmg.raw-request-time", response.elapsed.total_seconds()) + self.current_app.logger.info("MMG request for {} finished in {}".format(reference, elapsed_time)) return response