Merge pull request #1366 from GSA/main

10/16/24 production deploy
This commit is contained in:
Cliff Hill
2024-10-16 12:27:36 -04:00
committed by GitHub
20 changed files with 564 additions and 1090 deletions

View File

@@ -209,7 +209,7 @@
"filename": "tests/app/aws/test_s3.py",
"hashed_secret": "67a74306b06d0c01624fe0d0249a570f4d093747",
"is_verified": false,
"line_number": 27,
"line_number": 29,
"is_secret": false
}
],
@@ -384,5 +384,5 @@
}
]
},
"generated_at": "2024-09-10T18:12:39Z"
"generated_at": "2024-09-27T16:42:53Z"
}

View File

@@ -1,6 +1,9 @@
name: Deploy egress proxy
description: Set egress space security groups and deploy proxy
inputs:
cf_org:
description: The org the target app exists in.
required: true
cf_space:
description: The space the target app exists in.
required: true
@@ -16,6 +19,19 @@ inputs:
runs:
using: composite
steps:
- name: Install cf-cli
shell: bash
run: |
curl -A "cg-deploy-action" -v -L -o cf-cli_amd64.deb 'https://packages.cloudfoundry.org/stable?release=debian64&version=v8&source=github'
sudo dpkg -i cf-cli_amd64.deb
- name: Login to cf-cli
shell: bash
run: |
cf api api.fr.cloud.gov
cf auth
- name: Target org and space
shell: bash
run: cf target -o ${{ inputs.cf_org }} -s ${{ inputs.cf_space }}
- name: Set restricted space egress
shell: bash
run: ./terraform/set_space_egress.sh -t -s ${{ inputs.cf_space }}

View File

@@ -54,7 +54,7 @@ jobs:
- name: Check for dead code
run: make dead-code
- name: Run tests with coverage
run: poetry run coverage run --omit=*/notifications_utils/*,*/migrations/* -m pytest --maxfail=10
run: poetry run coverage run --omit=*/migrations/*,*/tests/* -m pytest --maxfail=10
env:
SQLALCHEMY_DATABASE_TEST_URI: postgresql://user:password@localhost:5432/test_notification_api
NOTIFY_E2E_TEST_EMAIL: ${{ secrets.NOTIFY_E2E_TEST_EMAIL }}
@@ -63,7 +63,7 @@ jobs:
NOTIFY_E2E_TEST_PASSWORD: ${{ secrets.NOTIFY_E2E_TEST_PASSWORD }}
- name: Check coverage threshold
# TODO get this back up to 95
run: poetry run coverage report --fail-under=95
run: poetry run coverage report -m --fail-under=91
validate-new-relic-config:
runs-on: ubuntu-latest

View File

@@ -49,7 +49,7 @@ jobs:
run: poetry export --without-hashes --format=requirements.txt > requirements.txt
- name: Deploy to cloud.gov
uses: 18f/cg-deploy-action@main
uses: cloud-gov/cg-cli-tools@main
env:
DANGEROUS_SALT: ${{ secrets.DANGEROUS_SALT }}
SECRET_KEY: ${{ secrets.SECRET_KEY }}
@@ -64,7 +64,8 @@ jobs:
cf_password: ${{ secrets.CLOUDGOV_PASSWORD }}
cf_org: gsa-tts-benefits-studio
cf_space: notify-demo
push_arguments: >-
cf_command: >-
push -f manifest.yml
--vars-file deploy-config/demo.yml
--var DANGEROUS_SALT="$DANGEROUS_SALT"
--var SECRET_KEY="$SECRET_KEY"
@@ -73,6 +74,7 @@ jobs:
--var NOTIFY_E2E_TEST_EMAIL="$NOTIFY_E2E_TEST_EMAIL"
--var NOTIFY_E2E_TEST_PASSWORD="$NOTIFY_E2E_TEST_PASSWORD"
--var LOGIN_DOT_GOV_REGISTRATION_URL="$LOGIN_DOT_GOV_REGISTRATION_URL"
--strategy rolling
- name: Check for changes to templates.json
id: changed-templates
@@ -95,6 +97,10 @@ jobs:
- name: Deploy egress proxy
if: steps.changed-egress-config.outputs.any_changed == 'true'
uses: ./.github/actions/deploy-proxy
env:
CF_USERNAME: ${{ secrets.CLOUDGOV_USERNAME }}
CF_PASSWORD: ${{ secrets.CLOUDGOV_PASSWORD }}
with:
cf_space: notify-demo
app: notify-api-demo
cf_org: gsa-tts-benefits-studio
cf_space: notify-staging
app: notify-api-staging

View File

@@ -53,7 +53,7 @@ jobs:
run: poetry export --without-hashes --format=requirements.txt > requirements.txt
- name: Deploy to cloud.gov
uses: 18f/cg-deploy-action@main
uses: cloud-gov/cg-cli-tools@main
env:
DANGEROUS_SALT: ${{ secrets.DANGEROUS_SALT }}
SECRET_KEY: ${{ secrets.SECRET_KEY }}
@@ -68,7 +68,8 @@ jobs:
cf_password: ${{ secrets.CLOUDGOV_PASSWORD }}
cf_org: gsa-tts-benefits-studio
cf_space: notify-production
push_arguments: >-
cf_command: >-
push -f manifest.yml
--vars-file deploy-config/production.yml
--var DANGEROUS_SALT="$DANGEROUS_SALT"
--var SECRET_KEY="$SECRET_KEY"
@@ -77,6 +78,7 @@ jobs:
--var NOTIFY_E2E_TEST_EMAIL="$NOTIFY_E2E_TEST_EMAIL"
--var NOTIFY_E2E_TEST_PASSWORD="$NOTIFY_E2E_TEST_PASSWORD"
--var LOGIN_DOT_GOV_REGISTRATION_URL="$LOGIN_DOT_GOV_REGISTRATION_URL"
--strategy rolling
- name: Check for changes to templates.json
id: changed-templates
@@ -99,6 +101,10 @@ jobs:
- name: Deploy egress proxy
if: steps.changed-egress-config.outputs.any_changed == 'true'
uses: ./.github/actions/deploy-proxy
env:
CF_USERNAME: ${{ secrets.CLOUDGOV_USERNAME }}
CF_PASSWORD: ${{ secrets.CLOUDGOV_PASSWORD }}
with:
cf_space: notify-production
app: notify-api-production
cf_org: gsa-tts-benefits-studio
cf_space: notify-staging
app: notify-api-staging

View File

@@ -55,7 +55,7 @@ jobs:
run: poetry export --without-hashes --format=requirements.txt > requirements.txt
- name: Deploy to cloud.gov
uses: 18f/cg-deploy-action@main
uses: cloud-gov/cg-cli-tools@main
env:
DANGEROUS_SALT: ${{ secrets.DANGEROUS_SALT }}
SECRET_KEY: ${{ secrets.SECRET_KEY }}
@@ -70,7 +70,8 @@ jobs:
cf_password: ${{ secrets.CLOUDGOV_PASSWORD }}
cf_org: gsa-tts-benefits-studio
cf_space: notify-staging
push_arguments: >-
cf_command: >-
push -f manifest.yml
--vars-file deploy-config/staging.yml
--var DANGEROUS_SALT="$DANGEROUS_SALT"
--var SECRET_KEY="$SECRET_KEY"
@@ -79,6 +80,7 @@ jobs:
--var NOTIFY_E2E_TEST_EMAIL="$NOTIFY_E2E_TEST_EMAIL"
--var NOTIFY_E2E_TEST_PASSWORD="$NOTIFY_E2E_TEST_PASSWORD"
--var LOGIN_DOT_GOV_REGISTRATION_URL="$LOGIN_DOT_GOV_REGISTRATION_URL"
--strategy rolling
- name: Check for changes to templates.json
id: changed-templates
@@ -101,7 +103,11 @@ jobs:
- name: Deploy egress proxy
if: steps.changed-egress-config.outputs.any_changed == 'true'
uses: ./.github/actions/deploy-proxy
env:
CF_USERNAME: ${{ secrets.CLOUDGOV_USERNAME }}
CF_PASSWORD: ${{ secrets.CLOUDGOV_PASSWORD }}
with:
cf_org: gsa-tts-benefits-studio
cf_space: notify-staging
app: notify-api-staging

View File

@@ -81,9 +81,10 @@ test: ## Run tests and create coverage report
poetry run black .
poetry run flake8 .
poetry run isort --check-only ./app ./tests
poetry run coverage run --omit=*/notifications_utils/*,*/migrations/* -m pytest --maxfail=10
poetry run coverage run --omit=*/migrations/*,*/tests/* -m pytest --maxfail=10
poetry run coverage report -m --fail-under=95
## TODO set this back to 95 asap
poetry run coverage report -m --fail-under=91
poetry run coverage html -d .coverage_cache
.PHONY: py-lock

View File

@@ -1,31 +1,45 @@
import datetime
import re
import time
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import Manager
import botocore
from boto3 import Session
from expiringdict import ExpiringDict
from flask import current_app
from app import redis_store
from app.clients import AWS_CLIENT_CONFIG
from notifications_utils import aware_utcnow
FILE_LOCATION_STRUCTURE = "service-{}-notify/{}.csv"
NEW_FILE_LOCATION_STRUCTURE = "{}-service-notify/{}.csv"
# Temporarily extend cache to 7 days
ttl = 60 * 60 * 24 * 7
JOBS = ExpiringDict(max_len=20000, max_age_seconds=ttl)
manager = Manager()
job_cache = manager.dict()
JOBS_CACHE_HITS = "JOBS_CACHE_HITS"
JOBS_CACHE_MISSES = "JOBS_CACHE_MISSES"
# Global variable
s3_client = None
s3_resource = None
def set_job_cache(job_cache, key, value):
job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60)
def clean_cache():
current_time = time.time()
keys_to_delete = []
for key, (_, expiry_time) in job_cache.items():
if expiry_time < current_time:
keys_to_delete.append(key)
for key in keys_to_delete:
del job_cache[key]
def get_s3_client():
global s3_client
if s3_client is None:
@@ -87,7 +101,6 @@ def get_bucket_name():
def cleanup_old_s3_objects():
bucket_name = get_bucket_name()
s3_client = get_s3_client()
@@ -99,9 +112,15 @@ def cleanup_old_s3_objects():
while True:
for obj in response.get("Contents", []):
if obj["LastModified"] <= time_limit:
current_app.logger.info(
f"#delete-old-s3-objects Wanting to delete: {obj['LastModified']} {obj['Key']}"
)
try:
remove_csv_object(obj["Key"])
current_app.logger.info(
f"#delete-old-s3-objects Deleted: {obj['LastModified']} {obj['Key']}"
)
except botocore.exceptions.ClientError:
current_app.logger.exception(f"Couldn't delete {obj['Key']}")
if "NextContinuationToken" in response:
response = s3_client.list_objects_v2(
Bucket=bucket_name,
@@ -115,36 +134,70 @@ def cleanup_old_s3_objects():
)
def get_s3_files():
def get_job_id_from_s3_object_key(key):
object_arr = key.split("/")
job_id = object_arr[1] # get the job_id
job_id = job_id.replace(".csv", "") # we just want the job_id
return job_id
def read_s3_file(bucket_name, object_key, s3res):
"""
This method runs during the 'regenerate job cache' task.
Note that in addition to retrieving the jobs and putting them
into the cache, this method also does some pre-processing by
putting a list of all phone numbers into the cache as well.
This means that when the report needs to be regenerated, it
can easily find the phone numbers in the cache through job_cache[<job_id>_phones]
and the personalization through job_cache[<job_id>_personalisation], which
in theory should make report generation a lot faster.
We are moving processing from the front end where the user can see it
in wait time, to this back end process.
"""
try:
job_id = get_job_id_from_s3_object_key(object_key)
if job_cache.get(job_id) is None:
object = (
s3res.Object(bucket_name, object_key)
.get()["Body"]
.read()
.decode("utf-8")
)
set_job_cache(job_cache, job_id, object)
set_job_cache(job_cache, f"{job_id}_phones", extract_phones(object))
set_job_cache(
job_cache,
f"{job_id}_personalisation",
extract_personalisation(object),
)
except LookupError:
# perhaps our key is not formatted as we expected. If so skip it.
current_app.logger.exception("LookupError #notify-admin-1200")
def get_s3_files():
"""
We're using the ThreadPoolExecutor here to speed up the retrieval of S3
csv files for scaling needs.
"""
bucket_name = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"]
objects = list_s3_objects()
object_keys = list_s3_objects()
s3res = get_s3_resource()
current_app.logger.info(
f"JOBS cache length before regen: {len(JOBS)} #notify-admin-1200"
f"job_cache length before regen: {len(job_cache)} #notify-admin-1200"
)
for object in objects:
# We put our csv files in the format "service-{service_id}-notify/{job_id}"
try:
object_arr = object.split("/")
job_id = object_arr[1] # get the job_id
job_id = job_id.replace(".csv", "") # we just want the job_id
if JOBS.get(job_id) is None:
object = (
s3res.Object(bucket_name, object)
.get()["Body"]
.read()
.decode("utf-8")
)
if "phone number" in object.lower():
JOBS[job_id] = object
except LookupError:
# perhaps our key is not formatted as we expected. If so skip it.
current_app.logger.exception("LookupError #notify-admin-1200")
try:
with ThreadPoolExecutor() as executor:
executor.map(lambda key: read_s3_file(bucket_name, key, s3res), object_keys)
except Exception:
current_app.logger.exception("Connection pool issue")
current_app.logger.info(
f"JOBS cache length after regen: {len(JOBS)} #notify-admin-1200"
f"job_cache length after regen: {len(job_cache)} #notify-admin-1200"
)
@@ -211,6 +264,27 @@ def file_exists(file_location):
def get_job_location(service_id, job_id):
current_app.logger.info(
f"#s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
)
return (
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id),
current_app.config["CSV_UPLOAD_BUCKET"]["access_key_id"],
current_app.config["CSV_UPLOAD_BUCKET"]["secret_access_key"],
current_app.config["CSV_UPLOAD_BUCKET"]["region"],
)
def get_old_job_location(service_id, job_id):
"""
This is deprecated. We are transitioning to NEW_FILE_LOCATION_STRUCTURE,
but it will take a few days where we have to support both formats.
Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE.
"""
current_app.logger.info(
f"#s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
)
return (
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
FILE_LOCATION_STRUCTURE.format(service_id, job_id),
@@ -239,17 +313,28 @@ def get_job_from_s3(service_id, job_id):
max_retries = 4
backoff_factor = 0.2
if not file_exists(FILE_LOCATION_STRUCTURE.format(service_id, job_id)):
if not file_exists(
FILE_LOCATION_STRUCTURE.format(service_id, job_id)
) and not file_exists(NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)):
current_app.logger.error(
f"This file does not exist {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
f"This file with service_id {service_id} and job_id {job_id} does not exist"
)
return None
while retries < max_retries:
try:
obj = get_s3_object(*get_job_location(service_id, job_id))
return obj.get()["Body"].read().decode("utf-8")
# TODO
# for transition on optimizing the s3 partition, we have
# to check for the file location using the new way and the
# old way. After this has been on production for a few weeks
# we should remove the check for the old way.
try:
obj = get_s3_object(*get_job_location(service_id, job_id))
return obj.get()["Body"].read().decode("utf-8")
except botocore.exceptions.ClientError:
obj = get_s3_object(*get_old_job_location(service_id, job_id))
return obj.get()["Body"].read().decode("utf-8")
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] in [
"Throttling",
@@ -257,7 +342,7 @@ def get_job_from_s3(service_id, job_id):
"SlowDown",
]:
current_app.logger.exception(
f"Retrying job fetch {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} retry_count={retries}",
f"Retrying job fetch service_id {service_id} job_id {job_id} retry_count={retries}",
)
retries += 1
sleep_time = backoff_factor * (2**retries) # Exponential backoff
@@ -266,36 +351,22 @@ def get_job_from_s3(service_id, job_id):
else:
# Typically this is "NoSuchKey"
current_app.logger.exception(
f"Failed to get job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}",
f"Failed to get job with service_id {service_id} job_id {job_id}",
)
return None
except Exception:
current_app.logger.exception(
f"Failed to get job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} retry_count={retries}",
f"Failed to get job with service_id {service_id} job_id {job_id}retry_count={retries}",
)
return None
current_app.logger.error(
f"Never retrieved job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}",
f"Never retrieved job with service_id {service_id} job_id {job_id}",
)
return None
def incr_jobs_cache_misses():
if not redis_store.get(JOBS_CACHE_MISSES):
redis_store.set(JOBS_CACHE_MISSES, 1)
else:
redis_store.incr(JOBS_CACHE_MISSES)
def incr_jobs_cache_hits():
if not redis_store.get(JOBS_CACHE_HITS):
redis_store.set(JOBS_CACHE_HITS, 1)
else:
redis_store.incr(JOBS_CACHE_HITS)
def extract_phones(job):
job = job.split("\r\n")
first_row = job[0]
@@ -304,7 +375,7 @@ def extract_phones(job):
phone_index = 0
for item in first_row:
# Note: may contain a BOM and look like \ufeffphone number
if "phone number" in item.lower():
if item.lower() in ["phone number", "\\ufeffphone number"]:
break
phone_index = phone_index + 1
@@ -328,6 +399,8 @@ def extract_phones(job):
def extract_personalisation(job):
if isinstance(job, dict):
job = job[0]
job = job.split("\r\n")
first_row = job[0]
job.pop(0)
@@ -343,43 +416,32 @@ def extract_personalisation(job):
def get_phone_number_from_s3(service_id, job_id, job_row_number):
# We don't want to constantly pull down a job from s3 every time we need a phone number.
# At the same time we don't want to store it in redis or the db
# So this is a little recycling mechanism to reduce the number of downloads.
job = JOBS.get(job_id)
job = job_cache.get(job_id)
if job is None:
current_app.logger.info(f"job {job_id} was not in the cache")
job = get_job_from_s3(service_id, job_id)
# Even if it is None, put it here to avoid KeyErrors
JOBS[job_id] = job
incr_jobs_cache_misses()
set_job_cache(job_cache, job_id, job)
else:
incr_jobs_cache_hits()
# skip expiration date from cache, we don't need it here
job = job[0]
if job is None:
current_app.logger.error(
f"Couldnt find phone for job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} because job is missing"
f"Couldnt find phone for job with service_id {service_id} job_id {job_id} because job is missing"
)
return "Unavailable"
# If we look in the JOBS cache for the quick lookup dictionary of phones for a given job
# and that dictionary is not there, create it
if JOBS.get(f"{job_id}_phones") is None:
JOBS[f"{job_id}_phones"] = extract_phones(job)
phones = extract_phones(job)
set_job_cache(job_cache, f"{job_id}_phones", phones)
# If we can find the quick dictionary, use it
if JOBS.get(f"{job_id}_phones") is not None:
phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number)
if phone_to_return:
return phone_to_return
else:
current_app.logger.warning(
f"Was unable to retrieve phone number from lookup dictionary for job {job_id}"
)
return "Unavailable"
phone_to_return = phones[job_row_number]
if phone_to_return:
return phone_to_return
else:
current_app.logger.error(
f"Was unable to construct lookup dictionary for job {job_id}"
current_app.logger.warning(
f"Was unable to retrieve phone number from lookup dictionary for job {job_id}"
)
return "Unavailable"
@@ -388,14 +450,15 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
# We don't want to constantly pull down a job from s3 every time we need the personalisation.
# At the same time we don't want to store it in redis or the db
# So this is a little recycling mechanism to reduce the number of downloads.
job = JOBS.get(job_id)
job = job_cache.get(job_id)
if job is None:
current_app.logger.info(f"job {job_id} was not in the cache")
job = get_job_from_s3(service_id, job_id)
JOBS[job_id] = job
incr_jobs_cache_misses()
# Even if it is None, put it here to avoid KeyErrors
set_job_cache(job_cache, job_id, job)
else:
incr_jobs_cache_hits()
# skip expiration date from cache, we don't need it here
job = job[0]
# If the job is None after our attempt to retrieve it from s3, it
# probably means the job is old and has been deleted from s3, in
# which case there is nothing we can do. It's unlikely to run into
@@ -407,14 +470,11 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
)
return {}
# If we look in the JOBS cache for the quick lookup dictionary of personalisations for a given job
# and that dictionary is not there, create it
if JOBS.get(f"{job_id}_personalisation") is None:
JOBS[f"{job_id}_personalisation"] = extract_personalisation(job)
set_job_cache(job_cache, f"{job_id}_personalisation", extract_personalisation(job))
# If we can find the quick dictionary, use it
if JOBS.get(f"{job_id}_personalisation") is not None:
personalisation_to_return = JOBS.get(f"{job_id}_personalisation").get(
if job_cache.get(f"{job_id}_personalisation") is not None:
personalisation_to_return = job_cache.get(f"{job_id}_personalisation")[0].get(
job_row_number
)
if personalisation_to_return:
@@ -432,6 +492,9 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
def get_job_metadata_from_s3(service_id, job_id):
current_app.logger.info(
f"#s3-partitioning CALLING GET_JOB_METADATA with {service_id}, {job_id}"
)
obj = get_s3_object(*get_job_location(service_id, job_id))
return obj.get()["Metadata"]

View File

@@ -446,6 +446,11 @@ def regenerate_job_cache():
s3.get_s3_files()
@notify_celery.task(name="clean-job-cache")
def clean_job_cache():
s3.clean_cache()
@notify_celery.task(name="delete-old-s3-objects")
def delete_old_s3_objects():
s3.cleanup_old_s3_objects()

View File

@@ -13,6 +13,10 @@ AWS_CLIENT_CONFIG = Config(
"addressing_style": "virtual",
},
use_fips_endpoint=True,
# This is the default but just for doc sake
# there may come a time when increasing this helps
# with job cache management.
max_pool_connections=10,
)

View File

@@ -931,7 +931,7 @@ where possible to enable better maintainability.
# generate n number of test orgs into the dev DB
@notify_command(name="add-test-organizations-to-db")
@click.option("-g", "--generate", required=True, prompt=True, default=1)
def add_test_organizations_to_db(generate):
def add_test_organizations_to_db(generate): # pragma: no cover
if getenv("NOTIFY_ENVIRONMENT", "") not in ["development", "test"]:
current_app.logger.error("Can only be run in development")
return
@@ -993,7 +993,7 @@ def add_test_organizations_to_db(generate):
# generate n number of test services into the dev DB
@notify_command(name="add-test-services-to-db")
@click.option("-g", "--generate", required=True, prompt=True, default=1)
def add_test_services_to_db(generate):
def add_test_services_to_db(generate): # pragma: no cover
if getenv("NOTIFY_ENVIRONMENT", "") not in ["development", "test"]:
current_app.logger.error("Can only be run in development")
return
@@ -1007,7 +1007,7 @@ def add_test_services_to_db(generate):
# generate n number of test jobs into the dev DB
@notify_command(name="add-test-jobs-to-db")
@click.option("-g", "--generate", required=True, prompt=True, default=1)
def add_test_jobs_to_db(generate):
def add_test_jobs_to_db(generate): # pragma: no cover
if getenv("NOTIFY_ENVIRONMENT", "") not in ["development", "test"]:
current_app.logger.error("Can only be run in development")
return
@@ -1022,7 +1022,7 @@ def add_test_jobs_to_db(generate):
# generate n number of notifications into the dev DB
@notify_command(name="add-test-notifications-to-db")
@click.option("-g", "--generate", required=True, prompt=True, default=1)
def add_test_notifications_to_db(generate):
def add_test_notifications_to_db(generate): # pragma: no cover
if getenv("NOTIFY_ENVIRONMENT", "") not in ["development", "test"]:
current_app.logger.error("Can only be run in development")
return
@@ -1043,7 +1043,7 @@ def add_test_notifications_to_db(generate):
@click.option("-g", "--generate", required=True, prompt=True, default="1")
@click.option("-s", "--state", default="active")
@click.option("-d", "--admin", default=False, type=bool)
def add_test_users_to_db(generate, state, admin):
def add_test_users_to_db(generate, state, admin): # pragma: no cover
if getenv("NOTIFY_ENVIRONMENT", "") not in ["development", "test"]:
current_app.logger.error("Can only be run in development")
return

View File

@@ -251,7 +251,7 @@ class Config(object):
},
"delete_old_s3_objects": {
"task": "delete-old-s3-objects",
"schedule": crontab(minute="*/5"),
"schedule": crontab(hour=7, minute=10),
"options": {"queue": QueueNames.PERIODIC},
},
"regenerate-job-cache": {
@@ -269,6 +269,11 @@ class Config(object):
"expires": 60,
}, # Ensure it doesn't run if missed
},
"clean-job-cache": {
"task": "clean-job-cache",
"schedule": crontab(hour=2, minute=11),
"options": {"queue": QueueNames.PERIODIC},
},
"cleanup-unfinished-jobs": {
"task": "cleanup-unfinished-jobs",
"schedule": crontab(hour=4, minute=5),

View File

@@ -36,17 +36,14 @@ def send_sms_to_provider(notification):
Get data for recipient, template,
notification and send it to sns.
"""
# we no longer store the personalisation in the db,
# need to retrieve from s3 before generating content
# However, we are still sending the initial verify code through personalisation
# so if there is some value there, don't overwrite it
if not notification.personalisation:
personalisation = get_personalisation_from_s3(
notification.service_id,
notification.job_id,
notification.job_row_number,
)
notification.personalisation = personalisation
# Take this path for report generation, where we know
# everything is in the cache.
personalisation = get_personalisation_from_s3(
notification.service_id,
notification.job_id,
notification.job_row_number,
)
notification.personalisation = personalisation
service = SerialisedService.from_id(notification.service_id)
message_id = None

View File

@@ -175,6 +175,7 @@ def create_job(service_id):
original_file_name = data.get("original_file_name")
data.update({"service": service_id})
try:
current_app.logger.info(f"#s3-partitioning DATA IN CREATE_JOB: {data}")
data.update(**get_job_metadata_from_s3(service_id, data["id"]))
except KeyError:
raise InvalidRequest(

View File

@@ -86,7 +86,7 @@ def _create_service_invite(invited_user, invite_link_host):
redis_store.set(
f"email-personalisation-{saved_notification.id}",
json.dumps(personalisation),
ex=1800,
ex=2*24*60*60,
)
send_notification_to_queue(saved_notification, queue=QueueNames.NOTIFY)

View File

@@ -599,7 +599,6 @@ def fetch_user_by_email():
fetched_user = get_user_by_email(email["email"])
debug_not_production(hilite(f"fetched user is {fetched_user}"))
result = fetched_user.serialize()
debug_not_production(hilite(f"result is serialized to {result}"))
return jsonify(data=result)
except Exception as e:
debug_not_production(hilite(f"Failed with {e}!!"))

View File

@@ -1242,6 +1242,17 @@ Notify.gov DNS records are maintained within [the 18f/dns repository](https://gi
- Rename to `api_static_scan_DATE.zip` and add it to 🔒 https://drive.google.com/drive/folders/1dSe9H7Ag_hLfi5hmQDB2ktWaDwWSf4_R
- Repeat for https://github.com/GSA/notifications-admin/actions/workflows/daily_checks.yml
## Rotating the DANGEROUS_SALT
1. Start API locally `make run-procfile`
2. In a separate terminal tab, navigate to the API project and run `poetry run flask command generate-salt`
3. A random secret will appear in the tab
4. Go to github->settings->secrets and variables->actions in the admin project and find the DANGEROUS_SALT secret for the admin project for staging. Open it and paste the result of #3 into the secret and save. Repeat for the API project, for staging.
5. Repeat #3 and #4 but do it for demo
6. Repeat #3 and #4 but do it for production
The important thing is to use the same secret for Admin and API on each tier--i.e. you only generate three secrets.
## <a name="gotcha"></a> Known Gotchas

View File

@@ -39,6 +39,12 @@ def init_app(app):
for logger_instance, handler in product(warning_loggers, handlers):
logger_instance.addHandler(handler)
logger_instance.setLevel(logging.WARNING)
# Suppress specific loggers to prevent leaking sensitive info
logging.getLogger("boto3").setLevel(logging.ERROR)
logging.getLogger("botocore").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
app.logger.info("Logging configured")

View File

@@ -1,4 +1,5 @@
import os
from datetime import timedelta
from os import getenv
import pytest
@@ -8,6 +9,7 @@ from app.aws.s3 import (
cleanup_old_s3_objects,
file_exists,
get_job_from_s3,
get_job_id_from_s3_object_key,
get_personalisation_from_s3,
get_phone_number_from_s3,
get_s3_file,
@@ -31,15 +33,30 @@ def single_s3_object_stub(key="foo", last_modified=None):
def test_cleanup_old_s3_objects(mocker):
"""
Currently we are going to delete s3 objects if they are more than 14 days old,
because we want to delete all jobs older than 7 days, and jobs can be scheduled
three days in advance, and on top of that we want to leave a little cushion for
the time being. This test shows that a 3 day old job ("B") is not deleted,
whereas a 30 day old job ("A") is.
"""
mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket")
mock_s3_client = mocker.Mock()
mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client)
mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object")
lastmod30 = aware_utcnow() - timedelta(days=30)
lastmod3 = aware_utcnow() - timedelta(days=3)
mock_s3_client.list_objects_v2.return_value = {
"Contents": [{"Key": "A", "LastModified": aware_utcnow()}]
"Contents": [
{"Key": "A", "LastModified": lastmod30},
{"Key": "B", "LastModified": lastmod3},
]
}
cleanup_old_s3_objects()
mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket")
mock_remove_csv_object.assert_called_once_with("A")
def test_get_s3_file_makes_correct_call(notify_api, mocker):
@@ -95,13 +112,27 @@ def test_get_s3_file_makes_correct_call(notify_api, mocker):
def test_get_phone_number_from_s3(
mocker, job, job_id, job_row_number, expected_phone_number
):
mocker.patch("app.aws.s3.redis_store")
get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3")
get_job_mock.return_value = job
phone_number = get_phone_number_from_s3("service_id", job_id, job_row_number)
assert phone_number == expected_phone_number
@pytest.mark.parametrize(
"key, expected_job_id",
[
("service-blahblahblah-notify/abcde.csv", "abcde"),
(
"service-x-notify/4c99f361-4ed7-49b1-bd6f-02fe0c807c53.csv",
"4c99f361-4ed7-49b1-bd6f-02fe0c807c53",
),
],
)
def test_get_job_id_from_s3_object_key(key, expected_job_id):
actual_job_id = get_job_id_from_s3_object_key(key)
assert actual_job_id == expected_job_id
def mock_s3_get_object_slowdown(*args, **kwargs):
error_response = {
"Error": {
@@ -120,7 +151,7 @@ def test_get_job_from_s3_exponential_backoff_on_throttling(mocker):
mocker.patch("app.aws.s3.file_exists", return_value=True)
job = get_job_from_s3("service_id", "job_id")
assert job is None
assert mock_get_object.call_count == 4
assert mock_get_object.call_count == 8
def test_get_job_from_s3_exponential_backoff_file_not_found(mocker):
@@ -160,7 +191,6 @@ def test_get_job_from_s3_exponential_backoff_file_not_found(mocker):
def test_get_personalisation_from_s3(
mocker, job, job_id, job_row_number, expected_personalisation
):
mocker.patch("app.aws.s3.redis_store")
get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3")
get_job_mock.return_value = job
personalisation = get_personalisation_from_s3("service_id", job_id, job_row_number)

File diff suppressed because it is too large Load Diff