mirror of
https://github.com/GSA/notifications-api.git
synced 2026-03-03 17:02:24 -05:00
This proves that a task can be duplicated **even** when the prefetch multiplier [1] is set to 1. This is because prefetched tasks are still present on the SQS queue and subject to the visibility timeout, whereas tasks in progress are not (deleted, unless acks_late is set [2]). I get the same results with: - A concurrency of 2 processes; or - A concurrency of 1 and 2 workers This is consistent with [3], which reinforces that there's no way to prevent this behaviour for successive, long-running tasks. Arguably the current behaviour - without acks_late - is worse because the task that gets duplicated is not the task at "fault" (the long-running one). Suggested actions ----------------- Keep the worker as-is: the settings are appropriate for long-running tasks and do help minimise any duplication. Turning on acks_late has a risk of creating new kinds of duplication - may be worse than now. Continue other work to parallelise and optimise long-running tasks, so that the visibility timeout is never an issue in the first place. Demo of task duplication ------------------------ In order to do the demo we need: - 3 tasks and 2 proceses: the hypothesis is that process #1 will pick up tasks #1 and #2, with task #2 "timing out". - To poll more often than the timeout, so that process #2 will pick up (duplicate) task #2 after it finishes #3. Alternatively, use two workers: source environment.sh && celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=1 source environment.sh && celery -A run_celery.notify_celery worker --loglevel=INFO --concurrency=1 Command to schedule the tasks: from app.celery.reporting_tasks import long_task [long_task.apply_async(queue='reporting-tasks') for i in [1,2,3]] Logs that show the duplication: [2022-01-12 16:50:09,333: INFO/MainProcess] Task long[4c23eaf0-69fc-4951-b215-437d79e30462] received [2022-01-12 16:50:09,901: INFO/ForkPoolWorker-1] Running long_task [2022-01-12 16:50:10,208: INFO/MainProcess] Task long[7c22c30d-e61d-4d2d-834c-7dae56b20064] received [2022-01-12 16:50:10,253: INFO/ForkPoolWorker-2] Running long_task [2022-01-12 16:50:11,373: INFO/MainProcess] Task long[67e5526d-4a82-430b-8eb5-b8022a20bf70] received [2022-01-12 16:50:21,410: INFO/MainProcess] Task long[67e5526d-4a82-430b-8eb5-b8022a20bf70] received [2022-01-12 16:50:30,499: INFO/ForkPoolWorker-3] Running long_task [2022-01-12 16:50:36,555: INFO/ForkPoolWorker-4] Running long_task Note that when the first two tasks are received varies: I've seen them received separately and at the same time. [1]: https://docs.celeryproject.org/en/stable/userguide/configuration.html#std-setting-worker_prefetch_multiplier [2]: https://docs.celeryproject.org/en/stable/userguide/tasks.html#Task.acks_late [3]: https://github.com/celery/celery/issues/2788#issuecomment-136273421
180 lines
5.9 KiB
Makefile
180 lines
5.9 KiB
Makefile
.DEFAULT_GOAL := help
|
|
SHELL := /bin/bash
|
|
DATE = $(shell date +%Y-%m-%d:%H:%M:%S)
|
|
|
|
APP_VERSION_FILE = app/version.py
|
|
|
|
GIT_BRANCH ?= $(shell git symbolic-ref --short HEAD 2> /dev/null || echo "detached")
|
|
GIT_COMMIT ?= $(shell git rev-parse HEAD)
|
|
|
|
CF_API ?= api.cloud.service.gov.uk
|
|
CF_ORG ?= govuk-notify
|
|
CF_SPACE ?= ${DEPLOY_ENV}
|
|
CF_HOME ?= ${HOME}
|
|
$(eval export CF_HOME)
|
|
|
|
CF_MANIFEST_PATH ?= /tmp/manifest.yml
|
|
|
|
|
|
NOTIFY_CREDENTIALS ?= ~/.notify-credentials
|
|
|
|
|
|
## DEVELOPMENT
|
|
|
|
.PHONY: bootstrap
|
|
bootstrap: generate-version-file ## Set up everything to run the app
|
|
pip3 install -r requirements_for_test.txt
|
|
createdb notification_api || true
|
|
(. environment.sh && flask db upgrade) || true
|
|
|
|
.PHONY: run-flask
|
|
run-flask: ## Run flask
|
|
. environment.sh && flask run -p 6011
|
|
|
|
.PHONY: run-celery
|
|
run-celery: ## Run celery
|
|
. environment.sh && celery \
|
|
-A run_celery.notify_celery worker \
|
|
--pidfile="/tmp/celery.pid" \
|
|
--loglevel=INFO \
|
|
--concurrency=2
|
|
|
|
.PHONY: run-celery-beat
|
|
run-celery-beat: ## Run celery beat
|
|
. environment.sh && celery \
|
|
-A run_celery.notify_celery beat \
|
|
--loglevel=INFO
|
|
|
|
.PHONY: help
|
|
help:
|
|
@cat $(MAKEFILE_LIST) | grep -E '^[a-zA-Z_-]+:.*?## .*$$' | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
|
|
|
.PHONY: generate-version-file
|
|
generate-version-file: ## Generates the app version file
|
|
@echo -e "__git_commit__ = \"${GIT_COMMIT}\"\n__time__ = \"${DATE}\"" > ${APP_VERSION_FILE}
|
|
|
|
.PHONY: test
|
|
test: ## Run tests
|
|
flake8 .
|
|
isort --check-only ./app ./tests
|
|
pytest -n4 --maxfail=10
|
|
|
|
.PHONY: freeze-requirements
|
|
freeze-requirements: ## Pin all requirements including sub dependencies into requirements.txt
|
|
pip install --upgrade pip-tools
|
|
pip-compile requirements.in
|
|
|
|
.PHONY: clean
|
|
clean:
|
|
rm -rf node_modules cache target venv .coverage build tests/.cache ${CF_MANIFEST_PATH}
|
|
|
|
|
|
## DEPLOYMENT
|
|
|
|
.PHONY: preview
|
|
preview: ## Set environment to preview
|
|
$(eval export DEPLOY_ENV=preview)
|
|
$(eval export DNS_NAME="notify.works")
|
|
@true
|
|
|
|
.PHONY: staging
|
|
staging: ## Set environment to staging
|
|
$(eval export DEPLOY_ENV=staging)
|
|
$(eval export DNS_NAME="staging-notify.works")
|
|
@true
|
|
|
|
.PHONY: production
|
|
production: ## Set environment to production
|
|
$(eval export DEPLOY_ENV=production)
|
|
$(eval export DNS_NAME="notifications.service.gov.uk")
|
|
@true
|
|
|
|
.PHONY: cf-login
|
|
cf-login: ## Log in to Cloud Foundry
|
|
$(if ${CF_USERNAME},,$(error Must specify CF_USERNAME))
|
|
$(if ${CF_PASSWORD},,$(error Must specify CF_PASSWORD))
|
|
$(if ${CF_SPACE},,$(error Must specify CF_SPACE))
|
|
@echo "Logging in to Cloud Foundry on ${CF_API}"
|
|
@cf login -a "${CF_API}" -u ${CF_USERNAME} -p "${CF_PASSWORD}" -o "${CF_ORG}" -s "${CF_SPACE}"
|
|
|
|
.PHONY: generate-manifest
|
|
generate-manifest:
|
|
$(if ${CF_APP},,$(error Must specify CF_APP))
|
|
$(if ${CF_SPACE},,$(error Must specify CF_SPACE))
|
|
$(if $(shell which gpg2), $(eval export GPG=gpg2), $(eval export GPG=gpg))
|
|
$(if ${GPG_PASSPHRASE_TXT}, $(eval export DECRYPT_CMD=echo -n $$$${GPG_PASSPHRASE_TXT} | ${GPG} --quiet --batch --passphrase-fd 0 --pinentry-mode loopback -d), $(eval export DECRYPT_CMD=${GPG} --quiet --batch -d))
|
|
|
|
@jinja2 --strict manifest.yml.j2 \
|
|
-D environment=${CF_SPACE} \
|
|
-D CF_APP=${CF_APP} \
|
|
--format=yaml \
|
|
<(${DECRYPT_CMD} ${NOTIFY_CREDENTIALS}/credentials/${CF_SPACE}/paas/environment-variables.gpg) 2>&1
|
|
|
|
.PHONY: cf-deploy
|
|
cf-deploy: ## Deploys the app to Cloud Foundry
|
|
$(if ${CF_SPACE},,$(error Must specify CF_SPACE))
|
|
$(if ${CF_APP},,$(error Must specify CF_APP))
|
|
cf target -o ${CF_ORG} -s ${CF_SPACE}
|
|
@cf app --guid ${CF_APP} || exit 1
|
|
|
|
# cancel any existing deploys to ensure we can apply manifest (if a deploy is in progress you'll see ScaleDisabledDuringDeployment)
|
|
cf cancel-deployment ${CF_APP} || true
|
|
|
|
# generate manifest (including secrets) and write it to CF_MANIFEST_PATH (in /tmp/)
|
|
make -s CF_APP=${CF_APP} generate-manifest > ${CF_MANIFEST_PATH}
|
|
|
|
# fails after 15 mins if deploy doesn't work
|
|
# reads manifest from CF_MANIFEST_PATH
|
|
CF_STARTUP_TIMEOUT=15 cf push ${CF_APP} --strategy=rolling -f ${CF_MANIFEST_PATH}
|
|
# delete old manifest file
|
|
rm ${CF_MANIFEST_PATH}
|
|
|
|
.PHONY: cf-deploy-api-db-migration
|
|
cf-deploy-api-db-migration:
|
|
$(if ${CF_SPACE},,$(error Must specify CF_SPACE))
|
|
cf target -o ${CF_ORG} -s ${CF_SPACE}
|
|
make -s CF_APP=notify-api-db-migration generate-manifest > ${CF_MANIFEST_PATH}
|
|
|
|
cf push notify-api-db-migration --no-route -f ${CF_MANIFEST_PATH}
|
|
rm ${CF_MANIFEST_PATH}
|
|
|
|
cf run-task notify-api-db-migration --command="flask db upgrade" --name api_db_migration
|
|
|
|
.PHONY: cf-check-api-db-migration-task
|
|
cf-check-api-db-migration-task: ## Get the status for the last notify-api-db-migration task
|
|
@cf curl /v3/apps/`cf app --guid notify-api-db-migration`/tasks?order_by=-created_at | jq -r ".resources[0].state"
|
|
|
|
.PHONY: cf-rollback
|
|
cf-rollback: ## Rollbacks the app to the previous release
|
|
$(if ${CF_APP},,$(error Must specify CF_APP))
|
|
rm ${CF_MANIFEST_PATH}
|
|
cf cancel-deployment ${CF_APP}
|
|
|
|
.PHONY: check-if-migrations-to-run
|
|
check-if-migrations-to-run:
|
|
@echo $(shell python3 scripts/check_if_new_migration.py)
|
|
|
|
.PHONY: cf-deploy-failwhale
|
|
cf-deploy-failwhale:
|
|
$(if ${CF_SPACE},,$(error Must target space, eg `make preview cf-deploy-failwhale`))
|
|
cd ./paas-failwhale; cf push notify-api-failwhale -f manifest.yml
|
|
|
|
.PHONY: enable-failwhale
|
|
enable-failwhale: ## Enable the failwhale app and disable api
|
|
$(if ${DNS_NAME},,$(error Must target space, eg `make preview enable-failwhale`))
|
|
# make sure failwhale is running first
|
|
cf start notify-api-failwhale
|
|
|
|
cf map-route notify-api-failwhale ${DNS_NAME} --hostname api
|
|
cf unmap-route notify-api ${DNS_NAME} --hostname api
|
|
@echo "Failwhale is enabled"
|
|
|
|
.PHONY: disable-failwhale
|
|
disable-failwhale: ## Disable the failwhale app and enable api
|
|
$(if ${DNS_NAME},,$(error Must target space, eg `make preview disable-failwhale`))
|
|
|
|
cf map-route notify-api ${DNS_NAME} --hostname api
|
|
cf unmap-route notify-api-failwhale ${DNS_NAME} --hostname api
|
|
cf stop notify-api-failwhale
|
|
@echo "Failwhale is disabled"
|