Files
notifications-api/scripts/run_multi_worker_app_paas.sh
Athanasios Voutsadakis 3528aab25b Kill the other processes started by the script
We use exec to start awslogs_agent and then a tail to print logs to
stdout. CF docs[1] recommend to use exec to start processes which seems
to imply that as long as there are commands running the container will
remain up and running.

This commit ensures that if there are no celery tasks running we will
kill any other processes that we have started, so that the container will
no longer be considered healthy by cloudfoundry and will be replaced.

1: https://docs.cloudfoundry.org/devguide/deploy-apps/manifest.html#start-commands
2019-01-23 16:23:58 +00:00

154 lines
3.6 KiB
Bash
Executable File

#!/bin/bash
set -e -o pipefail
TERMINATE_TIMEOUT=9
readonly LOGS_DIR="/home/vcap/logs"
function check_params {
if [ -z "${NOTIFY_APP_NAME}" ]; then
echo "You must set NOTIFY_APP_NAME"
exit 1
fi
if [ -z "${CW_APP_NAME}" ]; then
CW_APP_NAME=${NOTIFY_APP_NAME}
fi
}
function configure_aws_logs {
# create files so that aws logs agent doesn't complain
touch ${LOGS_DIR}/gunicorn_error.log
touch ${LOGS_DIR}/app.log.json
aws configure set plugins.cwlogs cwlogs
cat > /home/vcap/app/awslogs.conf << EOF
[general]
state_file = ${LOGS_DIR}/awslogs-state
[${LOGS_DIR}/app.log]
file = ${LOGS_DIR}/app.log.json
log_group_name = paas-${CW_APP_NAME}-application
log_stream_name = {hostname}
[${LOGS_DIR}/gunicorn_error.log]
file = ${LOGS_DIR}/gunicorn_error.log
log_group_name = paas-${CW_APP_NAME}-gunicorn
log_stream_name = {hostname}
EOF
}
# For every PID, check if it's still running. if it is, send the sigterm. then wait 9 seconds before sending sigkill
function on_exit {
echo "multi worker app exiting"
wait_time=0
send_signal_to_celery_processes TERM
# check if the apps are still running every second
while [[ "$wait_time" -le "$TERMINATE_TIMEOUT" ]]; do
get_celery_pids
# look here for explanation regarding this syntax:
# https://unix.stackexchange.com/a/298942/230401
PROCESS_COUNT="${#APP_PIDS[@]}"
if [[ "${PROCESS_COUNT}" -eq "0" ]]; then
echo "No celery process is running any more, exiting"
return 0
fi
let wait_time=wait_time+1
sleep 1
done
send_signal_to_celery_processes KILL
}
function get_celery_pids {
# get the PIDs of the process whose parent is the root process
# print only pid and their command, get the ones with "celery" in their name
# and keep only these PIDs
set +o pipefail # so grep returning no matches does not premature fail pipe
APP_PIDS=$(pgrep -P 1 | xargs ps -o pid=,command= -p | grep celery | cut -f1 -d/)
set -o pipefail # pipefail should be set everywhere else
}
function send_signal_to_celery_processes {
# refresh pids to account for the case that some workers may have terminated but others not
get_celery_pids
# send signal to all remaining apps
echo ${APP_PIDS} | tr -d '\n' | tr -s ' ' | xargs echo "Sending signal ${1} to processes with pids: "
echo ${APP_PIDS} | xargs kill -s ${1}
}
function start_application {
eval "$@"
get_celery_pids
echo "Application process pids: "${APP_PIDS}
}
function start_aws_logs_agent {
exec aws logs push --region eu-west-1 --config-file /home/vcap/app/awslogs.conf &
AWSLOGS_AGENT_PID=$!
echo "AWS logs agent pid: ${AWSLOGS_AGENT_PID}"
}
function start_logs_tail {
exec tail -n0 -f ${LOGS_DIR}/app.log.json &
LOGS_TAIL_PID=$!
echo "tail pid: ${LOGS_TAIL_PID}"
}
function ensure_celery_is_running {
if [ "${APP_PIDS}" = "" ]; then
echo "There are no celery processes running, this container is bad"
echo "Exporting CF information for diagnosis"
env | grep CF
echo "Sleeping 15 seconds for logs to get shipped"
sleep 15
echo "Killing awslogs_agent and tail"
kill -9 ${AWSLOGS_AGENT_PID}
kill -9 ${LOGS_TAIL_PID}
exit 1
fi
}
function run {
while true; do
get_celery_pids
ensure_celery_is_running
for APP_PID in ${APP_PIDS}; do
kill -0 ${APP_PID} 2&>/dev/null || return 1
done
kill -0 ${AWSLOGS_AGENT_PID} 2&>/dev/null || start_aws_logs_agent
kill -0 ${LOGS_TAIL_PID} 2&>/dev/null || start_logs_tail
sleep 1
done
}
echo "Run script pid: $$"
check_params
trap "on_exit" EXIT
configure_aws_logs
# The application has to start first!
start_application "$@"
start_aws_logs_agent
start_logs_tail
run