From 1425d6e22560399d9da96a241c5d8c3bd0ac5546 Mon Sep 17 00:00:00 2001 From: Alexey Bezhan Date: Thu, 28 Mar 2019 14:55:33 +0000 Subject: [PATCH] Increase DB connection pool size for API instances We've seen the SQLAlchemy "could not acquire connection" error in production during heavy traffic. Since we have more gunicorn eventlet workers than we have DB connections available some workers need to wait for a DB connection to become available before they can proceed with the request. There's a timeout set on how long a worker would wait and if that timeout is exceeded the above exception is raised. Currently, we're using at most 1000 out of 5000 max DB connections, 40% peak CPU usage on the DB instance and an average of 60% CPU on API instances during heavy load. The number of DB connections is proportionally similar in preview and staging. This slightly increases the number of max DB connections per API instance. This should improve our utilization of API instances by increasing the number of workers that can communicate with the DB concurrently while staying well within the max DB connections limit. --- manifest-api-base.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifest-api-base.yml b/manifest-api-base.yml index 9b7d6a055..5901d45dc 100644 --- a/manifest-api-base.yml +++ b/manifest-api-base.yml @@ -12,7 +12,7 @@ env: CW_APP_NAME: api # required by cf run-task FLASK_APP: application.py - SQLALCHEMY_POOL_SIZE: 15 + SQLALCHEMY_POOL_SIZE: 20 # Credentials variables ADMIN_BASE_URL: null