From f9120e7d3eb14e4c9c61caa189406bbebae4b149 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 18 Jan 2024 13:54:23 -0800 Subject: [PATCH 1/7] optimize how we look up phone numbers --- app/aws/s3.py | 49 +++++++++++++++++++++++++++++++++++++++++---- app/service/rest.py | 6 +++++- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index acc89ebc0..b6749c217 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -11,7 +11,8 @@ from app.clients import AWS_CLIENT_CONFIG FILE_LOCATION_STRUCTURE = "service-{}-notify/{}.csv" -JOBS = ExpiringDict(max_len=100, max_age_seconds=3600 * 4) +JOBS = ExpiringDict(max_len=1000, max_age_seconds=3600 * 4) + JOBS_CACHE_HITS = "JOBS_CACHE_HITS" JOBS_CACHE_MISSES = "JOBS_CACHE_MISSES" @@ -96,6 +97,32 @@ def incr_jobs_cache_hits(): current_app.logger.info(f"JOBS CACHE MISS hits {hits} misses {misses}") +def extract_phones(job): + job = job.split("\r\n") + first_row = job[0] + job.pop(0) + first_row = first_row.split(",") + phone_index = 0 + for item in first_row: + if item.lower() == "phone number": + break + phone_index = phone_index + 1 + phones = {} + job_row = 0 + for row in job: + row = row.split(",") + phone_index = 0 + for item in first_row: + if item.lower() == "phone number": + break + phone_index = phone_index + 1 + my_phone = row[phone_index] + my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) + phones[job_row] = my_phone + job_row = job_row + 1 + return phones + + def get_phone_number_from_s3(service_id, job_id, job_row_number): # We don't want to constantly pull down a job from s3 every time we need a phone number. # At the same time we don't want to store it in redis or the db @@ -108,6 +135,21 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): else: incr_jobs_cache_hits() + if job is None: + current_app.logger.warning( + "Couldnt find phone for job_id {job_id} row number {job_row_number} because job is missing" + ) + return "Unknown Phone" + + if JOBS.get(f"{job_id}_phones") is None: + JOBS[f"{job_id}_phones"] = extract_phones(job) + + if JOBS.get(f"{job_id}_phones") is not None: + phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) + if phone_to_return: + print(f"USING SHORT CUT! {phone_to_return}") + return phone_to_return + job = job.split("\r\n") first_row = job[0] job.pop(0) @@ -121,11 +163,10 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): correct_row = job[job_row_number] correct_row = correct_row.split(",") - # This could happen if an old job cannot be retrieved from s3 - if len(correct_row) <= phone_index: - return "Unknown Phone" my_phone = correct_row[phone_index] my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) + if not my_phone: + return "Unknown Phone" return my_phone diff --git a/app/service/rest.py b/app/service/rest.py index cef10ef1a..2f2c30697 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -433,8 +433,12 @@ def get_all_notifications_for_service(service_id): notification.job_id, notification.job_row_number, ) + print(f"RECIPIENTE IN service/rest {recipient}") notification.to = recipient notification.normalised_to = recipient + else: + notification.to = "1" + notification.normalised_to = "1" kwargs = request.args.to_dict() kwargs["service_id"] = service_id @@ -474,7 +478,7 @@ def get_all_notifications_for_service(service_id): page, len(next_page_of_pagination.items), ".get_all_notifications_for_service", - **kwargs + **kwargs, ) if count_pages else {}, From 797c9f5fc8f2be4ec7477747ba50e06018e2d2cb Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 18 Jan 2024 14:06:32 -0800 Subject: [PATCH 2/7] remove print statement --- app/aws/s3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index b6749c217..2e8f5196d 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -147,7 +147,6 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): if JOBS.get(f"{job_id}_phones") is not None: phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) if phone_to_return: - print(f"USING SHORT CUT! {phone_to_return}") return phone_to_return job = job.split("\r\n") From 510e67e20bf1ac254d1699e67120a69db778a263 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 18 Jan 2024 15:12:52 -0800 Subject: [PATCH 3/7] code review feedback --- app/aws/s3.py | 34 ++++++++++++++-------------------- app/service/rest.py | 1 - 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 2e8f5196d..f1fdb8737 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -94,7 +94,7 @@ def incr_jobs_cache_hits(): redis_store.incr(JOBS_CACHE_HITS) hits = redis_store.get(JOBS_CACHE_HITS).decode("utf-8") misses = redis_store.get(JOBS_CACHE_MISSES).decode("utf-8") - current_app.logger.info(f"JOBS CACHE MISS hits {hits} misses {misses}") + current_app.logger.info(f"JOBS CACHE HIT hits {hits} misses {misses}") def extract_phones(job): @@ -135,38 +135,32 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): else: incr_jobs_cache_hits() + # If the job is None after our attempt to retrieve it from s3, it + # probably means the job is old and has been deleted from s3, in + # which case there is nothing we can do. It's unlikely to run into + # this, but it could theoretically happen, especially if we ever + # change the task schedules if job is None: current_app.logger.warning( "Couldnt find phone for job_id {job_id} row number {job_row_number} because job is missing" ) return "Unknown Phone" + # If we look in the JOBS cache for the quick lookup dictionary of phones for a given job + # and that dictionary is not there, create it if JOBS.get(f"{job_id}_phones") is None: JOBS[f"{job_id}_phones"] = extract_phones(job) + # If we can find the quick dictionary, use it if JOBS.get(f"{job_id}_phones") is not None: phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) if phone_to_return: return phone_to_return - - job = job.split("\r\n") - first_row = job[0] - job.pop(0) - first_row = first_row.split(",") - phone_index = 0 - for item in first_row: - if item.lower() == "phone number": - break - phone_index = phone_index + 1 - - correct_row = job[job_row_number] - correct_row = correct_row.split(",") - - my_phone = correct_row[phone_index] - my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) - if not my_phone: - return "Unknown Phone" - return my_phone + else: + current_app.logger.warning( + "Was unable to retrieve phone number from lookup dictionary for job {job_id}" + ) + return "Unknown Phone" def get_job_metadata_from_s3(service_id, job_id): diff --git a/app/service/rest.py b/app/service/rest.py index 2f2c30697..bc52d49d9 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -433,7 +433,6 @@ def get_all_notifications_for_service(service_id): notification.job_id, notification.job_row_number, ) - print(f"RECIPIENTE IN service/rest {recipient}") notification.to = recipient notification.normalised_to = recipient else: From a100f60369c1ef1abcc777424faff66a11b66a09 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Fri, 19 Jan 2024 09:02:44 -0800 Subject: [PATCH 4/7] code review feedback --- app/aws/s3.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index f1fdb8737..f942feefb 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -158,9 +158,14 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): return phone_to_return else: current_app.logger.warning( - "Was unable to retrieve phone number from lookup dictionary for job {job_id}" + f"Was unable to retrieve phone number from lookup dictionary for job {job_id}" ) return "Unknown Phone" + else: + current_app.logger.error( + f"Was unable to construct lookup dictionary for job {job_id}" + ) + return "Unknown Phone" def get_job_metadata_from_s3(service_id, job_id): From aa6f8afa44e72dff619fac945367b3995c4d0f19 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 21:12:26 +0000 Subject: [PATCH 5/7] Bump beautifulsoup4 from 4.12.2 to 4.12.3 Bumps [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/bs4/) from 4.12.2 to 4.12.3. --- updated-dependencies: - dependency-name: beautifulsoup4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- poetry.lock | 11 +++++++---- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 406d20bed..e850898f3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -280,19 +280,22 @@ typecheck = ["mypy"] [[package]] name = "beautifulsoup4" -version = "4.12.2" +version = "4.12.3" description = "Screen-scraping library" optional = false python-versions = ">=3.6.0" files = [ - {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, - {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, ] [package.dependencies] soupsieve = ">1.2" [package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] @@ -4720,4 +4723,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "3389aa4ce5477dd99ab467168569e9920b942777f37e671bceb8e362ca362f76" +content-hash = "a6d1d609f32455bcd4a1b2934512bba368d3bad792a032bb0e8e2b49f6f5f99a" diff --git a/pyproject.toml b/pyproject.toml index bec3af860..95bf20635 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" python = ">=3.9,<3.12" alembic = "==1.13.1" amqp = "==5.2.0" -beautifulsoup4 = "==4.12.2" +beautifulsoup4 = "==4.12.3" boto3 = "^1.29.6" botocore = "^1.32.6" cachetools = "==5.3.2" From 2fcb2f59b8d8383b4f08039823a7793d15616f70 Mon Sep 17 00:00:00 2001 From: Carlo Costino Date: Fri, 19 Jan 2024 17:39:32 -0500 Subject: [PATCH 6/7] Adding missing egress proxy configuration This changeset adds a missing egress proxy configuration environment variable. Signed-off-by: Carlo Costino --- .profile | 1 + 1 file changed, 1 insertion(+) diff --git a/.profile b/.profile index 1c1f98b4b..f7721ea96 100644 --- a/.profile +++ b/.profile @@ -3,5 +3,6 @@ # https://docs.cloudfoundry.org/devguide/deploy-apps/deploy-app.html#profile ## +export http_proxy=$egress_proxy export https_proxy=$egress_proxy export NEW_RELIC_PROXY_HOST=$egress_proxy From 316875a22a8762c45dfcd97cbd5b8d6d2fac9dd6 Mon Sep 17 00:00:00 2001 From: Carlo Costino Date: Fri, 19 Jan 2024 17:42:38 -0500 Subject: [PATCH 7/7] Add Login.gov URLs to our egress allow ACL Signed-off-by: Carlo Costino --- deploy-config/egress_proxy/notify-api-demo.allow.acl | 2 ++ deploy-config/egress_proxy/notify-api-production.allow.acl | 2 ++ deploy-config/egress_proxy/notify-api-staging.allow.acl | 2 ++ 3 files changed, 6 insertions(+) diff --git a/deploy-config/egress_proxy/notify-api-demo.allow.acl b/deploy-config/egress_proxy/notify-api-demo.allow.acl index 36a93c46f..6aa6aa2b4 100644 --- a/deploy-config/egress_proxy/notify-api-demo.allow.acl +++ b/deploy-config/egress_proxy/notify-api-demo.allow.acl @@ -8,3 +8,5 @@ s3-fips.us-west-2.amazonaws.com sns-fips.us-east-1.amazonaws.com gov-collector.newrelic.com egress-proxy-notify-api-demo.apps.internal +idp.int.identitysandbox.gov +secure.login.gov diff --git a/deploy-config/egress_proxy/notify-api-production.allow.acl b/deploy-config/egress_proxy/notify-api-production.allow.acl index 2cc1bd8fe..a7a70636f 100644 --- a/deploy-config/egress_proxy/notify-api-production.allow.acl +++ b/deploy-config/egress_proxy/notify-api-production.allow.acl @@ -7,3 +7,5 @@ s3-fips.us-gov-west-1.amazonaws.com sns.us-gov-west-1.amazonaws.com gov-collector.newrelic.com egress-proxy-notify-api-production.apps.internal +idp.int.identitysandbox.gov +secure.login.gov diff --git a/deploy-config/egress_proxy/notify-api-staging.allow.acl b/deploy-config/egress_proxy/notify-api-staging.allow.acl index 3768c3c74..eb46ce948 100644 --- a/deploy-config/egress_proxy/notify-api-staging.allow.acl +++ b/deploy-config/egress_proxy/notify-api-staging.allow.acl @@ -8,3 +8,5 @@ s3-fips.us-west-2.amazonaws.com sns-fips.us-west-2.amazonaws.com gov-collector.newrelic.com egress-proxy-notify-api-staging.apps.internal +idp.int.identitysandbox.gov +secure.login.gov