From a3c1663d94d330b3efe58cc5a2304eb73bef801c Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 24 Sep 2024 10:17:02 -0700 Subject: [PATCH 01/17] fix phone number lookup --- app/aws/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 52e2a5eb1..e58159eff 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -304,7 +304,7 @@ def extract_phones(job): phone_index = 0 for item in first_row: # Note: may contain a BOM and look like \ufeffphone number - if "phone number" in item.lower(): + if item.lower() in ["phone number", "\ufeffphone number"]: break phone_index = phone_index + 1 From 881a251216a665393c71009eaedbc6d9abebc123 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Tue, 24 Sep 2024 10:26:01 -0700 Subject: [PATCH 02/17] fix phone number lookup --- app/aws/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index e58159eff..8af681d10 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -304,7 +304,7 @@ def extract_phones(job): phone_index = 0 for item in first_row: # Note: may contain a BOM and look like \ufeffphone number - if item.lower() in ["phone number", "\ufeffphone number"]: + if item.lower() in ["phone number", "\\ufeffphone number"]: break phone_index = phone_index + 1 From 8aae0d59b4f2c4a4a185103c8323ae49b0a7e8d6 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 11:56:39 -0700 Subject: [PATCH 03/17] use shared memory instead of expiring dict for jobs cache --- app/aws/s3.py | 84 +++++++++++++++++++++------------------------ app/celery/tasks.py | 5 +++ app/config.py | 5 +++ 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 23f47d7a2..ba3ec3b7f 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -1,31 +1,45 @@ import datetime import re import time +from multiprocessing import Manager import botocore from boto3 import Session -from expiringdict import ExpiringDict from flask import current_app -from app import redis_store from app.clients import AWS_CLIENT_CONFIG +from app.utils import hilite from notifications_utils import aware_utcnow FILE_LOCATION_STRUCTURE = "service-{}-notify/{}.csv" # Temporarily extend cache to 7 days ttl = 60 * 60 * 24 * 7 -JOBS = ExpiringDict(max_len=20000, max_age_seconds=ttl) +manager = Manager() +job_cache = manager.dict() -JOBS_CACHE_HITS = "JOBS_CACHE_HITS" -JOBS_CACHE_MISSES = "JOBS_CACHE_MISSES" - # Global variable s3_client = None s3_resource = None +def set_job_cache(job_cache, key, value): + job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60) + + +def clean_cache(): + current_time = time.time() + keys_to_delete = [] + for key, (_, expiry_time) in job_cache.items(): + if expiry_time < current_time: + keys_to_delete.append(key) + + for key in keys_to_delete: + print(hilite(f"DELETING {key}")) + del job_cache[key] + + def get_s3_client(): global s3_client if s3_client is None: @@ -127,7 +141,7 @@ def get_s3_files(): s3res = get_s3_resource() current_app.logger.info( - f"JOBS cache length before regen: {len(JOBS)} #notify-admin-1200" + f"job_cache length before regen: {len(job_cache)} #notify-admin-1200" ) for object in objects: # We put our csv files in the format "service-{service_id}-notify/{job_id}" @@ -135,7 +149,7 @@ def get_s3_files(): object_arr = object.split("/") job_id = object_arr[1] # get the job_id job_id = job_id.replace(".csv", "") # we just want the job_id - if JOBS.get(job_id) is None: + if job_cache.get(job_id) is None: object = ( s3res.Object(bucket_name, object) .get()["Body"] @@ -143,13 +157,13 @@ def get_s3_files(): .decode("utf-8") ) if "phone number" in object.lower(): - JOBS[job_id] = object + set_job_cache(job_cache, job_id, object) except LookupError: # perhaps our key is not formatted as we expected. If so skip it. current_app.logger.exception("LookupError #notify-admin-1200") current_app.logger.info( - f"JOBS cache length after regen: {len(JOBS)} #notify-admin-1200" + f"job_cache length after regen: {len(job_cache)} #notify-admin-1200" ) @@ -287,22 +301,8 @@ def get_job_from_s3(service_id, job_id): return None -def incr_jobs_cache_misses(): - if not redis_store.get(JOBS_CACHE_MISSES): - redis_store.set(JOBS_CACHE_MISSES, 1) - else: - redis_store.incr(JOBS_CACHE_MISSES) - - -def incr_jobs_cache_hits(): - if not redis_store.get(JOBS_CACHE_HITS): - redis_store.set(JOBS_CACHE_HITS, 1) - else: - redis_store.incr(JOBS_CACHE_HITS) - - def extract_phones(job): - job = job.split("\r\n") + job = job[0].split("\r\n") first_row = job[0] job.pop(0) first_row = first_row.split(",") @@ -333,7 +333,7 @@ def extract_phones(job): def extract_personalisation(job): - job = job.split("\r\n") + job = job[0].split("\r\n") first_row = job[0] job.pop(0) first_row = first_row.split(",") @@ -351,15 +351,12 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): # We don't want to constantly pull down a job from s3 every time we need a phone number. # At the same time we don't want to store it in redis or the db # So this is a little recycling mechanism to reduce the number of downloads. - job = JOBS.get(job_id) + job = job_cache.get(job_id) if job is None: current_app.logger.info(f"job {job_id} was not in the cache") job = get_job_from_s3(service_id, job_id) # Even if it is None, put it here to avoid KeyErrors - JOBS[job_id] = job - incr_jobs_cache_misses() - else: - incr_jobs_cache_hits() + set_job_cache(job_cache, job_id, job) if job is None: current_app.logger.error( @@ -369,12 +366,12 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): # If we look in the JOBS cache for the quick lookup dictionary of phones for a given job # and that dictionary is not there, create it - if JOBS.get(f"{job_id}_phones") is None: - JOBS[f"{job_id}_phones"] = extract_phones(job) + if job_cache.get(f"{job_id}_phones") is None: + set_job_cache(job_cache, f"{job_id}_phones", extract_phones(job)) # If we can find the quick dictionary, use it - if JOBS.get(f"{job_id}_phones") is not None: - phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) + if job_cache.get(f"{job_id}_phones") is not None: + phone_to_return = job_cache.get(f"{job_id}_phones")[0].get(job_row_number) if phone_to_return: return phone_to_return else: @@ -393,13 +390,10 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number): # We don't want to constantly pull down a job from s3 every time we need the personalisation. # At the same time we don't want to store it in redis or the db # So this is a little recycling mechanism to reduce the number of downloads. - job = JOBS.get(job_id) + job = job_cache.get(job_id) if job is None: job = get_job_from_s3(service_id, job_id) - JOBS[job_id] = job - incr_jobs_cache_misses() - else: - incr_jobs_cache_hits() + set_job_cache(job_cache, job_id, job) # If the job is None after our attempt to retrieve it from s3, it # probably means the job is old and has been deleted from s3, in @@ -414,12 +408,14 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number): # If we look in the JOBS cache for the quick lookup dictionary of personalisations for a given job # and that dictionary is not there, create it - if JOBS.get(f"{job_id}_personalisation") is None: - JOBS[f"{job_id}_personalisation"] = extract_personalisation(job) + if job_cache.get(f"{job_id}_personalisation") is None: + set_job_cache( + job_cache, f"{job_id}_personalisation", extract_personalisation(job) + ) # If we can find the quick dictionary, use it - if JOBS.get(f"{job_id}_personalisation") is not None: - personalisation_to_return = JOBS.get(f"{job_id}_personalisation").get( + if job_cache.get(f"{job_id}_personalisation") is not None: + personalisation_to_return = job_cache.get(f"{job_id}_personalisation")[0].get( job_row_number ) if personalisation_to_return: diff --git a/app/celery/tasks.py b/app/celery/tasks.py index 87df0ca83..c8ad8cc6d 100644 --- a/app/celery/tasks.py +++ b/app/celery/tasks.py @@ -446,6 +446,11 @@ def regenerate_job_cache(): s3.get_s3_files() +@notify_celery.task(name="clean-job-cache") +def clean_job_cache(): + s3.clean_cache() + + @notify_celery.task(name="delete-old-s3-objects") def delete_old_s3_objects(): s3.cleanup_old_s3_objects() diff --git a/app/config.py b/app/config.py index d032a672e..a46d5a6c2 100644 --- a/app/config.py +++ b/app/config.py @@ -269,6 +269,11 @@ class Config(object): "expires": 60, }, # Ensure it doesn't run if missed }, + "clean-job-cache": { + "task": "clean-job-cache", + "schedule": crontab(minute="*/5"), + "options": {"queue": QueueNames.PERIODIC}, + }, "cleanup-unfinished-jobs": { "task": "cleanup-unfinished-jobs", "schedule": crontab(hour=4, minute=5), From 561d813d4e2ba496b2470cdb790b21ead9137ac4 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 12:24:06 -0700 Subject: [PATCH 04/17] remove redis_store mocking from s3 tests --- tests/app/aws/test_s3.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index b88d515e0..027b77283 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -110,7 +110,6 @@ def test_get_s3_file_makes_correct_call(notify_api, mocker): def test_get_phone_number_from_s3( mocker, job, job_id, job_row_number, expected_phone_number ): - mocker.patch("app.aws.s3.redis_store") get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3") get_job_mock.return_value = job phone_number = get_phone_number_from_s3("service_id", job_id, job_row_number) @@ -175,7 +174,6 @@ def test_get_job_from_s3_exponential_backoff_file_not_found(mocker): def test_get_personalisation_from_s3( mocker, job, job_id, job_row_number, expected_personalisation ): - mocker.patch("app.aws.s3.redis_store") get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3") get_job_mock.return_value = job personalisation = get_personalisation_from_s3("service_id", job_id, job_row_number) From c2f2cbc3bbd500cea095e57b65fc2b33a5df2076 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 13:18:14 -0700 Subject: [PATCH 05/17] fix tests --- .ds.baseline | 4 ++-- app/aws/s3.py | 33 ++++++++++++++------------- tests/app/aws/test_s3.py | 49 +++++++++++++++++++--------------------- 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/.ds.baseline b/.ds.baseline index 40ebe5b16..c4c715c5a 100644 --- a/.ds.baseline +++ b/.ds.baseline @@ -209,7 +209,7 @@ "filename": "tests/app/aws/test_s3.py", "hashed_secret": "67a74306b06d0c01624fe0d0249a570f4d093747", "is_verified": false, - "line_number": 28, + "line_number": 25, "is_secret": false } ], @@ -384,5 +384,5 @@ } ] }, - "generated_at": "2024-09-11T14:31:46Z" + "generated_at": "2024-09-26T20:18:10Z" } diff --git a/app/aws/s3.py b/app/aws/s3.py index ba3ec3b7f..9d82a1cb8 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -302,7 +302,7 @@ def get_job_from_s3(service_id, job_id): def extract_phones(job): - job = job[0].split("\r\n") + job = job.split("\r\n") first_row = job[0] job.pop(0) first_row = first_row.split(",") @@ -333,6 +333,7 @@ def extract_phones(job): def extract_personalisation(job): + job = job[0].split("\r\n") first_row = job[0] job.pop(0) @@ -348,15 +349,18 @@ def extract_personalisation(job): def get_phone_number_from_s3(service_id, job_id, job_row_number): - # We don't want to constantly pull down a job from s3 every time we need a phone number. - # At the same time we don't want to store it in redis or the db - # So this is a little recycling mechanism to reduce the number of downloads. + print("ENTER get_phone_number_from_s3") job = job_cache.get(job_id) + print(f"JOB FROM CACHE {job}") if job is None: current_app.logger.info(f"job {job_id} was not in the cache") job = get_job_from_s3(service_id, job_id) + print(f"JOB from s3 {job}") # Even if it is None, put it here to avoid KeyErrors set_job_cache(job_cache, job_id, job) + else: + # skip expiration date from cache, we don't need it here + job = job[0] if job is None: current_app.logger.error( @@ -367,21 +371,18 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): # If we look in the JOBS cache for the quick lookup dictionary of phones for a given job # and that dictionary is not there, create it if job_cache.get(f"{job_id}_phones") is None: - set_job_cache(job_cache, f"{job_id}_phones", extract_phones(job)) + phones = extract_phones(job) + print(f"HMMM job is {job} phones are {phones}") + set_job_cache(job_cache, f"{job_id}_phones", phones) # If we can find the quick dictionary, use it - if job_cache.get(f"{job_id}_phones") is not None: - phone_to_return = job_cache.get(f"{job_id}_phones")[0].get(job_row_number) - if phone_to_return: - return phone_to_return - else: - current_app.logger.warning( - f"Was unable to retrieve phone number from lookup dictionary for job {job_id}" - ) - return "Unavailable" + print(f"PHONES {phones} ROW NUMBER {job_row_number}") + phone_to_return = phones[job_row_number] + if phone_to_return: + return phone_to_return else: - current_app.logger.error( - f"Was unable to construct lookup dictionary for job {job_id}" + current_app.logger.warning( + f"Was unable to retrieve phone number from lookup dictionary for job {job_id}" ) return "Unavailable" diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index 027b77283..40b6f9312 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,12 +1,10 @@ import os -from datetime import timedelta from os import getenv import pytest from botocore.exceptions import ClientError from app.aws.s3 import ( - cleanup_old_s3_objects, file_exists, get_job_from_s3, get_personalisation_from_s3, @@ -16,7 +14,6 @@ from app.aws.s3 import ( remove_s3_object, ) from app.utils import utc_now -from notifications_utils import aware_utcnow default_access_key = getenv("CSV_AWS_ACCESS_KEY_ID") default_secret_key = getenv("CSV_AWS_SECRET_ACCESS_KEY") @@ -31,30 +28,30 @@ def single_s3_object_stub(key="foo", last_modified=None): } -def test_cleanup_old_s3_objects(mocker): - """ - Currently we are going to delete s3 objects if they are more than 14 days old, - because we want to delete all jobs older than 7 days, and jobs can be scheduled - three days in advance, and on top of that we want to leave a little cushion for - the time being. This test shows that a 3 day old job ("B") is not deleted, - whereas a 30 day old job ("A") is. - """ - mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket") - mock_s3_client = mocker.Mock() - mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client) - mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object") - lastmod30 = aware_utcnow() - timedelta(days=30) - lastmod3 = aware_utcnow() - timedelta(days=3) +# def test_cleanup_old_s3_objects(mocker): +# """ +# Currently we are going to delete s3 objects if they are more than 14 days old, +# because we want to delete all jobs older than 7 days, and jobs can be scheduled +# three days in advance, and on top of that we want to leave a little cushion for +# the time being. This test shows that a 3 day old job ("B") is not deleted, +# whereas a 30 day old job ("A") is. +# """ +# mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket") +# mock_s3_client = mocker.Mock() +# mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client) +# mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object") +# lastmod30 = aware_utcnow() - timedelta(days=30) +# lastmod3 = aware_utcnow() - timedelta(days=3) - mock_s3_client.list_objects_v2.return_value = { - "Contents": [ - {"Key": "A", "LastModified": lastmod30}, - {"Key": "B", "LastModified": lastmod3}, - ] - } - cleanup_old_s3_objects() - mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket") - mock_remove_csv_object.assert_called_once_with("A") +# mock_s3_client.list_objects_v2.return_value = { +# "Contents": [ +# {"Key": "A", "LastModified": lastmod30}, +# {"Key": "B", "LastModified": lastmod3}, +# ] +# } +# cleanup_old_s3_objects() +# mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket") +# mock_remove_csv_object.assert_called_once_with("A") def test_get_s3_file_makes_correct_call(notify_api, mocker): From 3cba7f157ebbaf6c3b1b4fd49dfb9e680a80ea20 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 13:29:22 -0700 Subject: [PATCH 06/17] fix tests --- .ds.baseline | 4 ++-- tests/app/aws/test_s3.py | 50 ++++++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/.ds.baseline b/.ds.baseline index c4c715c5a..34faf15cc 100644 --- a/.ds.baseline +++ b/.ds.baseline @@ -209,7 +209,7 @@ "filename": "tests/app/aws/test_s3.py", "hashed_secret": "67a74306b06d0c01624fe0d0249a570f4d093747", "is_verified": false, - "line_number": 25, + "line_number": 28, "is_secret": false } ], @@ -384,5 +384,5 @@ } ] }, - "generated_at": "2024-09-26T20:18:10Z" + "generated_at": "2024-09-26T20:29:19Z" } diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index 40b6f9312..e793a4279 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,3 +1,4 @@ +from datetime import timedelta import os from os import getenv @@ -5,6 +6,7 @@ import pytest from botocore.exceptions import ClientError from app.aws.s3 import ( + cleanup_old_s3_objects, file_exists, get_job_from_s3, get_personalisation_from_s3, @@ -14,6 +16,7 @@ from app.aws.s3 import ( remove_s3_object, ) from app.utils import utc_now +from notifications_utils import aware_utcnow default_access_key = getenv("CSV_AWS_ACCESS_KEY_ID") default_secret_key = getenv("CSV_AWS_SECRET_ACCESS_KEY") @@ -28,30 +31,31 @@ def single_s3_object_stub(key="foo", last_modified=None): } -# def test_cleanup_old_s3_objects(mocker): -# """ -# Currently we are going to delete s3 objects if they are more than 14 days old, -# because we want to delete all jobs older than 7 days, and jobs can be scheduled -# three days in advance, and on top of that we want to leave a little cushion for -# the time being. This test shows that a 3 day old job ("B") is not deleted, -# whereas a 30 day old job ("A") is. -# """ -# mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket") -# mock_s3_client = mocker.Mock() -# mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client) -# mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object") -# lastmod30 = aware_utcnow() - timedelta(days=30) -# lastmod3 = aware_utcnow() - timedelta(days=3) +def test_cleanup_old_s3_objects(mocker): + """ + Currently we are going to delete s3 objects if they are more than 14 days old, + because we want to delete all jobs older than 7 days, and jobs can be scheduled + three days in advance, and on top of that we want to leave a little cushion for + the time being. This test shows that a 3 day old job ("B") is not deleted, + whereas a 30 day old job ("A") is. + """ + mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket") -# mock_s3_client.list_objects_v2.return_value = { -# "Contents": [ -# {"Key": "A", "LastModified": lastmod30}, -# {"Key": "B", "LastModified": lastmod3}, -# ] -# } -# cleanup_old_s3_objects() -# mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket") -# mock_remove_csv_object.assert_called_once_with("A") + mock_s3_client = mocker.Mock() + mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client) + mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object") + lastmod30 = aware_utcnow() - timedelta(days=30) + lastmod3 = aware_utcnow() - timedelta(days=3) + + mock_s3_client.list_objects_v2.return_value = { + "Contents": [ + {"Key": "A", "LastModified": lastmod30}, + {"Key": "B", "LastModified": lastmod3}, + ] + } + cleanup_old_s3_objects() + mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket") + mock_remove_csv_object.assert_called_once_with("A") def test_get_s3_file_makes_correct_call(notify_api, mocker): From 24f2b9fc9f2e926a05df661206d0f70a190587df Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 13:32:07 -0700 Subject: [PATCH 07/17] fix flake8 --- tests/app/aws/test_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index e793a4279..9cdadc354 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,5 +1,5 @@ -from datetime import timedelta import os +from datetime import timedelta from os import getenv import pytest From b6d37b70716b13020634a06e5ecf433de4138b80 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 13:47:51 -0700 Subject: [PATCH 08/17] dont do coverage of test directory --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 3e3a78aed..01d641794 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,6 +106,10 @@ sqlalchemy-utils = "^0.41.2" vulture = "^2.10" detect-secrets = "^1.5.0" +[tool.coverage.run] +omit = [ + "tests/*", # Omit everything in the tests directory +] [build-system] requires = ["poetry-core"] From 7c1ca1772114694d4604a66422346ab1ada9df0a Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 13:56:45 -0700 Subject: [PATCH 09/17] dont do coverage of test directory --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 175f630c4..26b240ef0 100644 --- a/Makefile +++ b/Makefile @@ -81,7 +81,7 @@ test: ## Run tests and create coverage report poetry run black . poetry run flake8 . poetry run isort --check-only ./app ./tests - poetry run coverage run --omit=*/notifications_utils/*,*/migrations/* -m pytest --maxfail=10 + poetry run coverage run --omit=*/notifications_utils/*,*/migrations/*,*/tests/* -m pytest --maxfail=10 poetry run coverage report -m --fail-under=95 poetry run coverage html -d .coverage_cache From fceeed2401cda79b3f97e21b5dce4e6807492027 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 14:05:58 -0700 Subject: [PATCH 10/17] dont do coverage of test directory --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 26b240ef0..aaaef847d 100644 --- a/Makefile +++ b/Makefile @@ -81,7 +81,7 @@ test: ## Run tests and create coverage report poetry run black . poetry run flake8 . poetry run isort --check-only ./app ./tests - poetry run coverage run --omit=*/notifications_utils/*,*/migrations/*,*/tests/* -m pytest --maxfail=10 + poetry run coverage run --source=*/app/* -m pytest --maxfail=10 poetry run coverage report -m --fail-under=95 poetry run coverage html -d .coverage_cache From 3259717061f4f1bd3465b540f3b329add3715dd9 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 14:15:51 -0700 Subject: [PATCH 11/17] dont do coverage of test directory --- .github/workflows/checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 14128b0f8..f83afea4e 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -54,7 +54,7 @@ jobs: - name: Check for dead code run: make dead-code - name: Run tests with coverage - run: poetry run coverage run --omit=*/notifications_utils/*,*/migrations/* -m pytest --maxfail=10 + run: poetry run coverage run --source=*/app/* -m pytest --maxfail=10 env: SQLALCHEMY_DATABASE_TEST_URI: postgresql://user:password@localhost:5432/test_notification_api NOTIFY_E2E_TEST_EMAIL: ${{ secrets.NOTIFY_E2E_TEST_EMAIL }} From b656ad84b79df648bdc4951a40fd513dff30b989 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 14:23:19 -0700 Subject: [PATCH 12/17] dont do coverage of test directory --- .github/workflows/checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index f83afea4e..a86b1f804 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -54,7 +54,7 @@ jobs: - name: Check for dead code run: make dead-code - name: Run tests with coverage - run: poetry run coverage run --source=*/app/* -m pytest --maxfail=10 + run: poetry run coverage run --omit=*/tests/*,*/migrations/* -m pytest --maxfail=10 env: SQLALCHEMY_DATABASE_TEST_URI: postgresql://user:password@localhost:5432/test_notification_api NOTIFY_E2E_TEST_EMAIL: ${{ secrets.NOTIFY_E2E_TEST_EMAIL }} From 67467bbedca1b6dc518cf84097a368bfeb2f8e1f Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 14:39:19 -0700 Subject: [PATCH 13/17] dont do coverage of test directory --- .github/workflows/checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index a86b1f804..f757f1c17 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -54,7 +54,7 @@ jobs: - name: Check for dead code run: make dead-code - name: Run tests with coverage - run: poetry run coverage run --omit=*/tests/*,*/migrations/* -m pytest --maxfail=10 + run: poetry run coverage run --omit=*/tests/*,*/notification_utils/*,*/migrations/* -m pytest --maxfail=10 env: SQLALCHEMY_DATABASE_TEST_URI: postgresql://user:password@localhost:5432/test_notification_api NOTIFY_E2E_TEST_EMAIL: ${{ secrets.NOTIFY_E2E_TEST_EMAIL }} From b4db1b0b2d1ca804f3b5787f7a8af56cf0fa2c9f Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 14:49:53 -0700 Subject: [PATCH 14/17] revert coverage changes --- .github/workflows/checks.yml | 2 +- Makefile | 2 +- pyproject.toml | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index f757f1c17..ec9fe60f8 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -54,7 +54,7 @@ jobs: - name: Check for dead code run: make dead-code - name: Run tests with coverage - run: poetry run coverage run --omit=*/tests/*,*/notification_utils/*,*/migrations/* -m pytest --maxfail=10 + run: poetry run coverage run --omit=*/notification_utils/*,*/migrations/* -m pytest --maxfail=10 env: SQLALCHEMY_DATABASE_TEST_URI: postgresql://user:password@localhost:5432/test_notification_api NOTIFY_E2E_TEST_EMAIL: ${{ secrets.NOTIFY_E2E_TEST_EMAIL }} diff --git a/Makefile b/Makefile index aaaef847d..175f630c4 100644 --- a/Makefile +++ b/Makefile @@ -81,7 +81,7 @@ test: ## Run tests and create coverage report poetry run black . poetry run flake8 . poetry run isort --check-only ./app ./tests - poetry run coverage run --source=*/app/* -m pytest --maxfail=10 + poetry run coverage run --omit=*/notifications_utils/*,*/migrations/* -m pytest --maxfail=10 poetry run coverage report -m --fail-under=95 poetry run coverage html -d .coverage_cache diff --git a/pyproject.toml b/pyproject.toml index 01d641794..3e3a78aed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,10 +106,6 @@ sqlalchemy-utils = "^0.41.2" vulture = "^2.10" detect-secrets = "^1.5.0" -[tool.coverage.run] -omit = [ - "tests/*", # Omit everything in the tests directory -] [build-system] requires = ["poetry-core"] From 1569de2afd3c7afd8cfa6e153206039be7d73066 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 14:59:18 -0700 Subject: [PATCH 15/17] revert coverage changes --- .github/workflows/checks.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index ec9fe60f8..57863effb 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -54,7 +54,7 @@ jobs: - name: Check for dead code run: make dead-code - name: Run tests with coverage - run: poetry run coverage run --omit=*/notification_utils/*,*/migrations/* -m pytest --maxfail=10 + run: poetry run coverage run --omit=*/migrations/* -m pytest --maxfail=10 env: SQLALCHEMY_DATABASE_TEST_URI: postgresql://user:password@localhost:5432/test_notification_api NOTIFY_E2E_TEST_EMAIL: ${{ secrets.NOTIFY_E2E_TEST_EMAIL }} diff --git a/Makefile b/Makefile index 175f630c4..a0fd86ae4 100644 --- a/Makefile +++ b/Makefile @@ -81,7 +81,7 @@ test: ## Run tests and create coverage report poetry run black . poetry run flake8 . poetry run isort --check-only ./app ./tests - poetry run coverage run --omit=*/notifications_utils/*,*/migrations/* -m pytest --maxfail=10 + poetry run coverage run --omit=*/migrations/* -m pytest --maxfail=10 poetry run coverage report -m --fail-under=95 poetry run coverage html -d .coverage_cache From 096e3a367deb19d49aac78a61beb47eb77187263 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 15:17:57 -0700 Subject: [PATCH 16/17] clean up --- app/aws/s3.py | 6 ------ app/config.py | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 9d82a1cb8..02e12db60 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -36,7 +36,6 @@ def clean_cache(): keys_to_delete.append(key) for key in keys_to_delete: - print(hilite(f"DELETING {key}")) del job_cache[key] @@ -349,13 +348,10 @@ def extract_personalisation(job): def get_phone_number_from_s3(service_id, job_id, job_row_number): - print("ENTER get_phone_number_from_s3") job = job_cache.get(job_id) - print(f"JOB FROM CACHE {job}") if job is None: current_app.logger.info(f"job {job_id} was not in the cache") job = get_job_from_s3(service_id, job_id) - print(f"JOB from s3 {job}") # Even if it is None, put it here to avoid KeyErrors set_job_cache(job_cache, job_id, job) else: @@ -372,11 +368,9 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): # and that dictionary is not there, create it if job_cache.get(f"{job_id}_phones") is None: phones = extract_phones(job) - print(f"HMMM job is {job} phones are {phones}") set_job_cache(job_cache, f"{job_id}_phones", phones) # If we can find the quick dictionary, use it - print(f"PHONES {phones} ROW NUMBER {job_row_number}") phone_to_return = phones[job_row_number] if phone_to_return: return phone_to_return diff --git a/app/config.py b/app/config.py index a46d5a6c2..4a8c880d3 100644 --- a/app/config.py +++ b/app/config.py @@ -271,7 +271,7 @@ class Config(object): }, "clean-job-cache": { "task": "clean-job-cache", - "schedule": crontab(minute="*/5"), + "schedule": crontab(hour=2, minute=11), "options": {"queue": QueueNames.PERIODIC}, }, "cleanup-unfinished-jobs": { From 8e6c079c0943dba45ebba31c963a63b27742953b Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 26 Sep 2024 15:22:13 -0700 Subject: [PATCH 17/17] fix flake8 --- app/aws/s3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 02e12db60..690c98e6a 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -8,7 +8,6 @@ from boto3 import Session from flask import current_app from app.clients import AWS_CLIENT_CONFIG -from app.utils import hilite from notifications_utils import aware_utcnow FILE_LOCATION_STRUCTURE = "service-{}-notify/{}.csv"