mirror of
https://github.com/GSA/notifications-admin.git
synced 2025-12-10 23:23:27 -05:00
We have seen multiple issues in production where healthchecks have failed for our applications as responses have taken longer than 1 second (the default health check invocation timeout) to respond and this has marked the instance as unhealthy and restarted it. This restarting has dropped inflight requests and caused 502s for our users. We are not entirely sure why the healthchecks sometimes take longer than expected. One hypothesis is large amounts of traffic slowing response times of the apps, however we have also seen contradictory evidence where health checks can still fail even when apps are getting very low levels of traffic. There could also be an issue with the actual healthcheck process itself. Regardless of the cause, we think by changing the timeout to 10 seconds it might stop our apps being restarted when they are infact still healthy enough to serve requests to users. Further investigation will also be done by the PaaS team into the health check process itself to see if this throws any more light on the situation. 10 seconds was a fairly abritary choice that was significantly longer than 1 second.
63 lines
1.7 KiB
Django/Jinja
63 lines
1.7 KiB
Django/Jinja
{%- set apps = {
|
|
'notify-admin': {
|
|
'routes': {
|
|
'preview': ['www.notify.works'],
|
|
'staging': ['www.staging-notify.works'],
|
|
'production': ['www.notifications.service.gov.uk'],
|
|
}
|
|
},
|
|
'notify-admin-prototype': {},
|
|
'notify-admin-prototype-2': {}
|
|
} -%}
|
|
|
|
{%- set app = apps[CF_APP] -%}
|
|
|
|
---
|
|
applications:
|
|
- name: {{ CF_APP }}
|
|
buildpack: python_buildpack
|
|
|
|
memory: 1G
|
|
|
|
routes:
|
|
- route: {{ CF_APP }}-{{ environment }}.cloudapps.digital
|
|
{%- for route in app.get('routes', {}).get(environment, []) %}
|
|
- route: {{ route }}
|
|
{%- endfor %}
|
|
|
|
health-check-type: http
|
|
health-check-http-endpoint: '/_status?simple=true'
|
|
health-check-invocation-timeout: 10
|
|
|
|
services:
|
|
- logit-ssl-syslog-drain
|
|
|
|
env:
|
|
NOTIFY_APP_NAME: admin
|
|
|
|
# Credentials variables
|
|
ADMIN_CLIENT_SECRET: '{{ ADMIN_CLIENT_SECRET }}'
|
|
ADMIN_BASE_URL: '{{ ADMIN_BASE_URL }}'
|
|
API_HOST_NAME: '{{ API_HOST_NAME }}'
|
|
DANGEROUS_SALT: '{{ DANGEROUS_SALT }}'
|
|
SECRET_KEY: '{{ SECRET_KEY }}'
|
|
ROUTE_SECRET_KEY_1: '{{ ROUTE_SECRET_KEY_1 }}'
|
|
ROUTE_SECRET_KEY_2: '{{ ROUTE_SECRET_KEY_2 }}'
|
|
|
|
AWS_ACCESS_KEY_ID: '{{ AWS_ACCESS_KEY_ID }}'
|
|
AWS_SECRET_ACCESS_KEY: '{{ AWS_SECRET_ACCESS_KEY }}'
|
|
|
|
ANTIVIRUS_API_HOST: '{{ ANTIVIRUS_API_HOST }}'
|
|
ANTIVIRUS_API_KEY: '{{ ANTIVIRUS_API_KEY }}'
|
|
|
|
STATSD_HOST: 'notify-statsd-exporter-{{ environment }}.apps.internal'
|
|
STATSD_PREFIX: '{{ STATSD_PREFIX }}'
|
|
|
|
ZENDESK_API_KEY: '{{ ZENDESK_API_KEY }}'
|
|
|
|
TEMPLATE_PREVIEW_API_HOST: '{{ TEMPLATE_PREVIEW_API_HOST }}'
|
|
TEMPLATE_PREVIEW_API_KEY: '{{ TEMPLATE_PREVIEW_API_KEY }}'
|
|
|
|
REDIS_ENABLED: '{{ REDIS_ENABLED }}'
|
|
REDIS_URL: '{{ REDIS_URL }}'
|