mirror of
https://github.com/GSA/notifications-api.git
synced 2025-12-20 15:31:15 -05:00
This was introduced in #1811 as a way to avoid sending traffic to newly created apps where gunicorn had not started yet, such as the case during a scaling event. These days we depend mostly on scheduled scaling and we rarely need to scale above the scheduled values. Yesterday we had an event where (during a traffic spike) the healthcheck failed causing the instance to be killed and sending a 5XX response code to all the connections that this instance was handling at the time. However, this instance was not unhealthy and was serving traffic. The problem stems from a combination of using async workers, having to limit the number of database connections and a thread holding onto a db connection for the entire duration of the request. Specifically, we end up having requests queued up in gunicorn waiting for other requests to finish and release the db connection. Some pages such as the dashboard generate queries that can take >5s. If a healthcheck request is sent during a traffic spike and the instance in question was "unfortunate" enough to get handled a few of these long running queries, the healthcheck request will be queued up behind these slow requests and will fail to receive a response within 1s [docs]. Ideally we should be able to configure the healthcheck timeout to a value of our choosing, since we can end up in this situation again in the future. docs: https://docs.cloudfoundry.org/devguide/deploy-apps/healthchecks.html#types
66 lines
1.3 KiB
YAML
66 lines
1.3 KiB
YAML
---
|
|
|
|
buildpack: python_buildpack
|
|
command: unset GUNICORN_CMD_ARGS; scripts/run_app_paas.sh gunicorn -c /home/vcap/app/gunicorn_config.py application
|
|
|
|
services:
|
|
- notify-db
|
|
- logit-ssl-syslog-drain
|
|
|
|
env:
|
|
NOTIFY_APP_NAME: public-api
|
|
CW_APP_NAME: api
|
|
# required by cf run-task
|
|
FLASK_APP: application.py
|
|
SQLALCHEMY_POOL_SIZE: 15
|
|
|
|
# Credentials variables
|
|
ADMIN_BASE_URL: null
|
|
ADMIN_CLIENT_SECRET: null
|
|
API_HOST_NAME: null
|
|
DANGEROUS_SALT: null
|
|
SECRET_KEY: null
|
|
ROUTE_SECRET_KEY_1: null
|
|
ROUTE_SECRET_KEY_2: null
|
|
|
|
PERFORMANCE_PLATFORM_ENDPOINTS: null
|
|
|
|
NOTIFICATION_QUEUE_PREFIX: null
|
|
AWS_ACCESS_KEY_ID: null
|
|
AWS_SECRET_ACCESS_KEY: null
|
|
|
|
STATSD_PREFIX: null
|
|
|
|
ZENDESK_API_KEY: null
|
|
|
|
MMG_URL: null
|
|
MMG_API_KEY: null
|
|
MMG_INBOUND_SMS_AUTH: null
|
|
MMG_INBOUND_SMS_USERNAME: null
|
|
|
|
FIRETEXT_API_KEY: null
|
|
LOADTESTING_API_KEY: null
|
|
FIRETEXT_INBOUND_SMS_AUTH: null
|
|
|
|
REDIS_ENABLED: null
|
|
REDIS_URL: null
|
|
|
|
TEMPLATE_PREVIEW_API_HOST: null
|
|
TEMPLATE_PREVIEW_API_KEY: null
|
|
|
|
DOCUMENT_DOWNLOAD_API_HOST: null
|
|
DOCUMENT_DOWNLOAD_API_KEY: null
|
|
|
|
instances: 1
|
|
memory: 1G
|
|
|
|
applications:
|
|
- name: notify-api
|
|
|
|
- name: notify-api-db-migration
|
|
command: sleep infinity
|
|
no-route: true
|
|
health-check-type: none
|
|
instances: 1
|
|
memory: 128M
|