From 12f460adc5921288f91fcb59269f16d69f53d80e Mon Sep 17 00:00:00 2001 From: David McDonald Date: Thu, 25 Jun 2020 17:22:08 +0100 Subject: [PATCH] Turn off statsd wrapper for synchronous statsd calls during POSTs This commit turns off StatsD metrics for the following - the `dao_create_notification` function - the `user-agent` metric - the response times and response codes per flask endpoint This has been done with the purpose of not having the creation of text messages or emails call out to StatsD during the request process. These are the three current metrics that are currently called during the processing of one of those requests and so have been removed from the API. The reason for removing the calls out to StatsD when processing a request to send a notification is that we have seen two incidents recently affected by DNS resolution for StatsD (either by a slow down in resolution time or a failure to resolve). These POST requests are our most critical code path and we don't want them to be affected by any potential unforeseen trouble with StatsD DNS resolution. We are not going to miss the removal of these metrics. - the `dao_create_notification` metric is rarely/never looked at - the `user-agent` metric is rarely/never looked at and can be got from our logs if we want it - the response times and response codes per flask endpoint are already exposed using the gds metrics python library I did not remove the statsd metrics from any other parts of the API because - As the POST notification endpoints are the main source of web traffic, we should have already removed most calls to StatsD which should greatly reduce the chance of their being any further issues with DNS resolution - Some of the other metrics still provide value so no point deleting them if we don't need to - The metrics on celery tasks will not affect any HTTP requests from users as they are async and also we do not currently have the infrastructure set up to replace them with a prometheus HTTP endpoint that can be scraped so this would require more work --- app/__init__.py | 16 ---------------- app/dao/notifications_dao.py | 1 - requirements-app.txt | 2 +- requirements.txt | 10 +++++----- tests/app/test_user_agent_processing.py | 13 ------------- 5 files changed, 6 insertions(+), 36 deletions(-) delete mode 100644 tests/app/test_user_agent_processing.py diff --git a/app/__init__.py b/app/__init__.py index 4563cd473..52a527021 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -269,10 +269,6 @@ def register_v2_blueprints(application): def init_app(app): - @app.before_request - def record_user_agent(): - statsd_client.incr("user-agent.{}".format(process_user_agent(request.headers.get('User-Agent', None)))) - @app.before_request def record_request_details(): CONCURRENT_REQUESTS.inc() @@ -319,18 +315,6 @@ def create_random_identifier(): return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(16)) -def process_user_agent(user_agent_string): - if user_agent_string and user_agent_string.lower().startswith("notify"): - components = user_agent_string.split("/") - client_name = components[0].lower() - client_version = components[1].replace(".", "-") - return "{}.{}".format(client_name, client_version) - elif user_agent_string and not user_agent_string.lower().startswith("notify"): - return "non-notify-user-agent" - else: - return "unknown" - - def setup_sqlalchemy_events(app): TOTAL_DB_CONNECTIONS = Gauge( diff --git a/app/dao/notifications_dao.py b/app/dao/notifications_dao.py index 8b0b67568..8bc62657e 100644 --- a/app/dao/notifications_dao.py +++ b/app/dao/notifications_dao.py @@ -77,7 +77,6 @@ def dao_get_last_date_template_was_used(template_id, service_id): return last_date -@statsd(namespace="dao") @transactional def dao_create_notification(notification): if not notification.id: diff --git a/requirements-app.txt b/requirements-app.txt index 444164308..58add7c6c 100644 --- a/requirements-app.txt +++ b/requirements-app.txt @@ -27,7 +27,7 @@ notifications-python-client==5.5.1 # PaaS awscli-cwlogs>=1.4,<1.5 -git+https://github.com/alphagov/notifications-utils.git@39.7.1#egg=notifications-utils==39.7.1 +git+https://github.com/alphagov/notifications-utils.git@40.0.0#egg=notifications-utils==40.0.0 # gds-metrics requires prometheseus 0.2.0, override that requirement as 0.7.1 brings significant performance gains prometheus-client==0.7.1 diff --git a/requirements.txt b/requirements.txt index c70959d8c..3d1087c14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ notifications-python-client==5.5.1 # PaaS awscli-cwlogs>=1.4,<1.5 -git+https://github.com/alphagov/notifications-utils.git@39.7.1#egg=notifications-utils==39.7.1 +git+https://github.com/alphagov/notifications-utils.git@40.0.0#egg=notifications-utils==40.0.0 # gds-metrics requires prometheseus 0.2.0, override that requirement as 0.7.1 brings significant performance gains prometheus-client==0.7.1 @@ -40,14 +40,14 @@ alembic==1.4.2 amqp==1.4.9 anyjson==0.3.3 attrs==19.3.0 -awscli==1.18.85 +awscli==1.18.89 bcrypt==3.1.7 billiard==3.3.0.23 bleach==3.1.4 blinker==1.4 boto==2.49.0 boto3==1.10.38 -botocore==1.17.8 +botocore==1.17.12 certifi==2020.6.20 chardet==3.0.4 click==7.1.2 @@ -58,8 +58,8 @@ flask-redis==0.4.0 future==0.18.2 govuk-bank-holidays==0.6 greenlet==0.4.16 -idna==2.9 -importlib-metadata==1.6.1 +idna==2.10 +importlib-metadata==1.7.0 Jinja2==2.11.2 jmespath==0.10.0 kombu==3.0.37 diff --git a/tests/app/test_user_agent_processing.py b/tests/app/test_user_agent_processing.py deleted file mode 100644 index 161b0e475..000000000 --- a/tests/app/test_user_agent_processing.py +++ /dev/null @@ -1,13 +0,0 @@ -from app import process_user_agent - - -def test_can_process_notify_api_user_agent(): - assert "notify-api-python-client.3-0-0" == process_user_agent("NOTIFY-API-PYTHON-CLIENT/3.0.0") - - -def test_can_handle_non_notify_api_user_agent(): - assert "non-notify-user-agent" == process_user_agent("Mozilla/5.0 (iPad; U; CPU OS 3_2_1 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Mobile/7B405") # noqa - - -def test_handles_null_user_agent(): - assert "unknown" == process_user_agent(None)