add prometheus metrics for connection pools (both web and sql)

add the following prometheus metrics to keep track of general app instance health. # sqs_apply_async_duration how long does the actual SQS call (a standard web request to AWS) take. a histogram with default bucket sizes, split up per task that was created. # concurrent_web_request_count how many web requests is this app currently serving. this is split up per process, so we'd expect multiple responses per app instance # db_connection_total_connected how many connections does this app (process) have open to the database. They might be idle. # db_connection_total_checked_out how many connections does this app (process) have open that are currently in use by a web worker # db_connection_open_duration_seconds a histogram per endpoint of how long the db connection was taken from the pool for. won't have any data if a connection was never opened.
2026-01-31 15:15:38 -05:00 · 2020-04-24 14:36:21 +01:00
parent 4bb37a05ec
commit 6e32ca5996
2 changed files with 85 additions and 1 deletions
--- a/app/celery/celery.py
+++ b/app/celery/celery.py
@@ -1,5 +1,6 @@
 import time

+from gds_metrics.metrics import Histogram
 from celery import Celery, Task
 from celery.signals import worker_process_shutdown
 from flask import g, request
@@ -19,6 +20,12 @@ def log_on_worker_shutdown(sender, signal, pid, exitcode, **kwargs):


 def make_task(app):
+    SQS_APPLY_ASYNC_DURATION = Histogram(
+        'sqs_apply_async_duration',
+        'Time taken to put task on queue',
+        ['task_name']
+    )
+
    class NotifyTask(Task):
        abstract = True
        start = None
@@ -52,7 +59,8 @@ def make_task(app):
            if has_request_context() and hasattr(request, 'request_id'):
                kwargs['request_id'] = request.request_id

-            return super().apply_async(args, kwargs, task_id, producer, link, link_error, **options)
+            with SQS_APPLY_ASYNC_DURATION.labels(self.name).time():
+                return super().apply_async(args, kwargs, task_id, producer, link, link_error, **options)

    return NotifyTask