automatically retry letters stuck in pending-virus-scan

Since sept 2019 we've had to log on to production around once every
twenty days to restart the virus scan task for a letter. Most of the
time this is just a case of making sure the file is in the scan bucket,
and then triggering the task. If the file isn't in the scan bucket we'd
need to do some more manual investigation to find out exactly where the
file got stuck, but I can only remember times when it's been in the scan
bucket.

So if the file is in the scan bucket, we can just check that with code
and kick the task off automatically.
This commit is contained in:
Leo Hemsted
2022-03-07 18:20:22 +00:00
parent 520d621893
commit 00259893f1
2 changed files with 75 additions and 13 deletions

View File

@@ -8,6 +8,7 @@ from sqlalchemy import between
from sqlalchemy.exc import SQLAlchemyError
from app import db, notify_celery, zendesk_client
from app.aws import s3
from app.celery.broadcast_message_tasks import trigger_link_test
from app.celery.letters_pdf_tasks import get_pdf_for_templated_letter
from app.celery.tasks import (
@@ -45,6 +46,7 @@ from app.dao.services_dao import (
dao_find_services_with_high_failure_rates,
)
from app.dao.users_dao import delete_codes_older_created_more_than_a_day_ago
from app.letters.utils import generate_letter_pdf_filename
from app.models import (
EMAIL_TYPE,
JOB_STATUS_ERROR,
@@ -207,14 +209,38 @@ def replay_created_notifications():
@notify_celery.task(name='check-if-letters-still-pending-virus-check')
def check_if_letters_still_pending_virus_check():
letters = dao_precompiled_letters_still_pending_virus_check()
letters = []
for letter in dao_precompiled_letters_still_pending_virus_check():
# find letter in the scan bucket
filename = generate_letter_pdf_filename(
letter.reference,
letter.created_at,
ignore_folder=True,
postage=letter.postage
)
if s3.file_exists(current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename):
current_app.logger.warning(
f'Letter id {letter.id} got stuck in pending-virus-check. Sending off for scan again.'
)
notify_celery.send_task(
name=TaskNames.SCAN_FILE,
kwargs={'filename': filename},
queue=QueueNames.ANTIVIRUS,
)
else:
letters.append(letter)
if len(letters) > 0:
letter_ids = [(str(letter.id), letter.reference) for letter in letters]
msg = """{} precompiled letters have been pending-virus-check for over 90 minutes. Follow runbook to resolve:
msg = f"""{len(letters)} precompiled letters have been pending-virus-check for over 90 minutes.
We couldn't find them in the scan bucket.
Follow runbook to resolve:
https://github.com/alphagov/notifications-manuals/wiki/Support-Runbook#Deal-with-letter-pending-virus-scan-for-90-minutes.
Notifications: {}""".format(len(letters), sorted(letter_ids))
Notifications: {sorted(letter_ids)}"""
if current_app.config['NOTIFY_ENVIRONMENT'] in ['live', 'production', 'test']:
ticket = NotifySupportTicket(