diff --git a/Pipfile b/Pipfile index 0e3f9608a..8904227af 100644 --- a/Pipfile +++ b/Pipfile @@ -56,13 +56,11 @@ python-dotenv = "==1.0.0" radon = "==6.0.1" sqlalchemy = "==1.4.40" werkzeug = "~=2.3" -# gds metrics packages -prometheus-client = "==0.17.1" -gds-metrics = {version = "==0.2.4", ref = "6f1840a57b6fb1ee40b7e84f2f18ec229de8aa72", git = "https://github.com/alphagov/gds_metrics_python.git"} +vulture = "==2.8" + packaging = "==23.1" notifications-utils = {editable = true, ref = "main", git = "https://github.com/GSA/notifications-utils.git"} newrelic = "*" -vulture = "==2.8" [dev-packages] exceptiongroup = "==1.1.2" diff --git a/app/__init__.py b/app/__init__.py index 5d57c8d76..f565bf731 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -17,8 +17,6 @@ from flask import ( from flask_marshmallow import Marshmallow from flask_migrate import Migrate from flask_sqlalchemy import SQLAlchemy as _SQLAlchemy -from gds_metrics import GDSMetrics -from gds_metrics.metrics import Gauge, Histogram from notifications_utils import logging, request_helper from notifications_utils.celery import NotifyCelery from notifications_utils.clients.encryption.encryption_client import Encryption @@ -61,18 +59,13 @@ encryption = Encryption() zendesk_client = ZendeskClient() redis_store = RedisClient() document_download_client = DocumentDownloadClient() -metrics = GDSMetrics() + notification_provider_clients = NotificationProviderClients() api_user = LocalProxy(lambda: g.api_user) authenticated_service = LocalProxy(lambda: g.authenticated_service) -CONCURRENT_REQUESTS = Gauge( - 'concurrent_web_request_count', - 'How many concurrent requests are currently being served', -) - def create_app(application): from app.config import configs @@ -84,8 +77,6 @@ def create_app(application): application.config['NOTIFY_APP_NAME'] = application.name init_app(application) - # Metrics intentionally high up to give the most accurate timing and reliability that the metric is recorded - metrics.init_app(application) request_helper.init_app(application) db.init_app(application) migrate.init_app(application, db=db) @@ -280,14 +271,12 @@ def init_app(app): @app.before_request def record_request_details(): - CONCURRENT_REQUESTS.inc() g.start = monotonic() g.endpoint = request.endpoint @app.after_request def after_request(response): - CONCURRENT_REQUESTS.dec() response.headers.add('X-Content-Type-Options', 'nosniff') return response @@ -339,39 +328,19 @@ def create_random_identifier(): # TODO maintainability what is the purpose of this? Debugging? def setup_sqlalchemy_events(app): - TOTAL_DB_CONNECTIONS = Gauge( - 'db_connection_total_connected', - 'How many db connections are currently held (potentially idle) by the server', - ) - - TOTAL_CHECKED_OUT_DB_CONNECTIONS = Gauge( - 'db_connection_total_checked_out', - 'How many db connections are currently checked out by web requests', - ) - - DB_CONNECTION_OPEN_DURATION_SECONDS = Histogram( - 'db_connection_open_duration_seconds', - 'How long db connections are held open for in seconds', - ['method', 'host', 'path'] - ) - # need this or db.engine isn't accessible with app.app_context(): @event.listens_for(db.engine, 'connect') def connect(dbapi_connection, connection_record): # noqa - # connection first opened with db - TOTAL_DB_CONNECTIONS.inc() + pass @event.listens_for(db.engine, 'close') def close(dbapi_connection, connection_record): # noqa - # connection closed (probably only happens with overflow connections) - TOTAL_DB_CONNECTIONS.dec() + pass @event.listens_for(db.engine, 'checkout') def checkout(dbapi_connection, connection_record, connection_proxy): # noqa try: - # connection given to a web worker - TOTAL_CHECKED_OUT_DB_CONNECTIONS.inc() # this will overwrite any previous checkout_at timestamp connection_record.info['checkout_at'] = time.monotonic() @@ -407,17 +376,4 @@ def setup_sqlalchemy_events(app): @event.listens_for(db.engine, 'checkin') def checkin(dbapi_connection, connection_record): # noqa - try: - # connection returned by a web worker - TOTAL_CHECKED_OUT_DB_CONNECTIONS.dec() - - # duration that connection was held by a single web request - duration = time.monotonic() - connection_record.info['checkout_at'] - - DB_CONNECTION_OPEN_DURATION_SECONDS.labels( - connection_record.info['request_data']['method'], - connection_record.info['request_data']['host'], - connection_record.info['request_data']['url_rule'] - ).observe(duration) - except Exception: - current_app.logger.exception("Exception caught for checkin event.") + pass diff --git a/app/authentication/auth.py b/app/authentication/auth.py index f594d00ad..6a1ed6091 100644 --- a/app/authentication/auth.py +++ b/app/authentication/auth.py @@ -1,7 +1,6 @@ import uuid from flask import current_app, g, request -from gds_metrics import Histogram from notifications_python_client.authentication import ( decode_jwt_token, get_token_issuer, @@ -24,11 +23,6 @@ TOKEN_MESSAGE_ONE = "Invalid token: make sure your API token matches the example TOKEN_MESSAGE_TWO = "at https://docs.notifications.service.gov.uk/rest-api.html#authorisation-header" # nosec B105 GENERAL_TOKEN_ERROR_MESSAGE = TOKEN_MESSAGE_ONE + TOKEN_MESSAGE_TWO -AUTH_DB_CONNECTION_DURATION_SECONDS = Histogram( - 'auth_db_connection_duration_seconds', - 'Time taken to get DB connection and fetch service from database', -) - class AuthError(Exception): def __init__(self, message, code, service_id=None, api_key_id=None): @@ -102,8 +96,7 @@ def requires_auth(): raise AuthError("Invalid token: service id is not the right data type", 403) try: - with AUTH_DB_CONNECTION_DURATION_SECONDS.time(): - service = SerialisedService.from_id(service_id) + service = SerialisedService.from_id(service_id) except NoResultFound: raise AuthError("Invalid token: service not found", 403) diff --git a/app/notifications/process_notifications.py b/app/notifications/process_notifications.py index 317deb607..e2be54bde 100644 --- a/app/notifications/process_notifications.py +++ b/app/notifications/process_notifications.py @@ -2,7 +2,6 @@ import uuid from datetime import datetime from flask import current_app -from gds_metrics import Histogram from notifications_utils.clients import redis from notifications_utils.recipients import ( format_email_address, @@ -30,11 +29,6 @@ from app.models import ( ) from app.v2.errors import BadRequestError -REDIS_GET_AND_INCR_DAILY_LIMIT_DURATION_SECONDS = Histogram( - 'redis_get_and_incr_daily_limit_duration_seconds', - 'Time taken to get and possibly incremement the daily limit cache key', -) - def create_content_for_notification(template, personalisation): if template.template_type == EMAIL_TYPE: diff --git a/app/notifications/receive_notifications.py b/app/notifications/receive_notifications.py index ae0997217..a7e852349 100644 --- a/app/notifications/receive_notifications.py +++ b/app/notifications/receive_notifications.py @@ -1,5 +1,4 @@ from flask import Blueprint, current_app, json, jsonify, request -from gds_metrics.metrics import Counter from notifications_utils.recipients import try_validate_and_format_phone_number from app.celery import tasks @@ -14,13 +13,6 @@ receive_notifications_blueprint = Blueprint('receive_notifications', __name__) register_errors(receive_notifications_blueprint) -INBOUND_SMS_COUNTER = Counter( - 'inbound_sms', - 'Total number of inbound SMS received', - ['provider'] -) - - @receive_notifications_blueprint.route('/notifications/sms/receive/sns', methods=['POST']) def receive_sns_sms(): """ @@ -64,8 +56,6 @@ def receive_sns_sms(): result="success", message="SMS-SNS callback succeeded" ), 200 - INBOUND_SMS_COUNTER.labels("sns").inc() - content = message.get("messageBody") from_number = message.get('originationNumber') provider_ref = message.get('inboundMessageId') diff --git a/app/notifications/validators.py b/app/notifications/validators.py index c3e2b0b00..4025d916a 100644 --- a/app/notifications/validators.py +++ b/app/notifications/validators.py @@ -1,5 +1,4 @@ from flask import current_app -from gds_metrics.metrics import Histogram from notifications_utils import SMS_CHAR_COUNT_LIMIT from notifications_utils.clients.redis import ( daily_total_cache_key, @@ -31,21 +30,15 @@ from app.service.utils import service_allowed_to_send_to from app.utils import get_public_notify_type_text from app.v2.errors import BadRequestError, RateLimitError, TotalRequestsError -REDIS_EXCEEDED_RATE_LIMIT_DURATION_SECONDS = Histogram( - 'redis_exceeded_rate_limit_duration_seconds', - 'Time taken to check rate limit', -) - def check_service_over_api_rate_limit(service, api_key): if current_app.config['API_RATE_LIMIT_ENABLED'] and current_app.config['REDIS_ENABLED']: cache_key = rate_limit_cache_key(service.id, api_key.key_type) rate_limit = service.rate_limit interval = 60 - with REDIS_EXCEEDED_RATE_LIMIT_DURATION_SECONDS.time(): - if redis_store.exceeded_rate_limit(cache_key, rate_limit, interval): - current_app.logger.info("service {} has been rate limited for throughput".format(service.id)) - raise RateLimitError(rate_limit, interval, api_key.key_type) + if redis_store.exceeded_rate_limit(cache_key, rate_limit, interval): + current_app.logger.info("service {} has been rate limited for throughput".format(service.id)) + raise RateLimitError(rate_limit, interval, api_key.key_type) def check_application_over_retention_limit(key_type, service): diff --git a/app/v2/notifications/post_notifications.py b/app/v2/notifications/post_notifications.py index 55a3d7cc2..8780b682c 100644 --- a/app/v2/notifications/post_notifications.py +++ b/app/v2/notifications/post_notifications.py @@ -4,7 +4,6 @@ from datetime import datetime import botocore from flask import abort, current_app, jsonify, request -from gds_metrics import Histogram from notifications_utils.recipients import try_validate_and_format_phone_number from app import ( @@ -53,23 +52,17 @@ from app.v2.notifications.notification_schemas import ( ) from app.v2.utils import get_valid_json -POST_NOTIFICATION_JSON_PARSE_DURATION_SECONDS = Histogram( - 'post_notification_json_parse_duration_seconds', - 'Time taken to parse and validate post request json', -) - @v2_notification_blueprint.route('/', methods=['POST']) def post_notification(notification_type): - with POST_NOTIFICATION_JSON_PARSE_DURATION_SECONDS.time(): - request_json = get_valid_json() + request_json = get_valid_json() - if notification_type == EMAIL_TYPE: - form = validate(request_json, post_email_request) - elif notification_type == SMS_TYPE: - form = validate(request_json, post_sms_request) - else: - abort(404) + if notification_type == EMAIL_TYPE: + form = validate(request_json, post_email_request) + elif notification_type == SMS_TYPE: + form = validate(request_json, post_sms_request) + else: + abort(404) check_service_has_permission(notification_type, authenticated_service.permissions) diff --git a/gunicorn_config.py b/gunicorn_config.py index 06da4f712..ff42fee05 100644 --- a/gunicorn_config.py +++ b/gunicorn_config.py @@ -5,7 +5,6 @@ import gunicorn import eventlet import socket -from gds_metrics.gunicorn import child_exit # noqa workers = 4 worker_class = "eventlet" diff --git a/run_celery.py b/run_celery.py index 6c7f4a2b9..e21533c86 100644 --- a/run_celery.py +++ b/run_celery.py @@ -2,11 +2,6 @@ from flask import Flask -# import prometheus before any other code. If gds_metrics is imported first it will write a prometheus file to disk -# that will never be read from (since we don't have prometheus celery stats). If prometheus is imported first, -# prometheus will simply store the metrics in memory -import prometheus_client # noqa - # notify_celery is referenced from manifest_delivery_base.yml, and cannot be removed from app import notify_celery, create_app # noqa diff --git a/tests/app/test_route_authentication.py b/tests/app/test_route_authentication.py index e8ed40583..9eb6891e9 100644 --- a/tests/app/test_route_authentication.py +++ b/tests/app/test_route_authentication.py @@ -11,6 +11,4 @@ def test_all_routes_have_authentication(client): # The static route is always available by default for a Flask app to serve anything in the static folder. routes_blueprint_names.remove('static') - # The metrics route is not protected by auth as it's available to be scraped by Prometheus - routes_blueprint_names.remove('metrics') assert sorted(blueprint_names) == sorted(routes_blueprint_names)