From f9120e7d3eb14e4c9c61caa189406bbebae4b149 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 18 Jan 2024 13:54:23 -0800 Subject: [PATCH 1/4] optimize how we look up phone numbers --- app/aws/s3.py | 49 +++++++++++++++++++++++++++++++++++++++++---- app/service/rest.py | 6 +++++- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index acc89ebc0..b6749c217 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -11,7 +11,8 @@ from app.clients import AWS_CLIENT_CONFIG FILE_LOCATION_STRUCTURE = "service-{}-notify/{}.csv" -JOBS = ExpiringDict(max_len=100, max_age_seconds=3600 * 4) +JOBS = ExpiringDict(max_len=1000, max_age_seconds=3600 * 4) + JOBS_CACHE_HITS = "JOBS_CACHE_HITS" JOBS_CACHE_MISSES = "JOBS_CACHE_MISSES" @@ -96,6 +97,32 @@ def incr_jobs_cache_hits(): current_app.logger.info(f"JOBS CACHE MISS hits {hits} misses {misses}") +def extract_phones(job): + job = job.split("\r\n") + first_row = job[0] + job.pop(0) + first_row = first_row.split(",") + phone_index = 0 + for item in first_row: + if item.lower() == "phone number": + break + phone_index = phone_index + 1 + phones = {} + job_row = 0 + for row in job: + row = row.split(",") + phone_index = 0 + for item in first_row: + if item.lower() == "phone number": + break + phone_index = phone_index + 1 + my_phone = row[phone_index] + my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) + phones[job_row] = my_phone + job_row = job_row + 1 + return phones + + def get_phone_number_from_s3(service_id, job_id, job_row_number): # We don't want to constantly pull down a job from s3 every time we need a phone number. # At the same time we don't want to store it in redis or the db @@ -108,6 +135,21 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): else: incr_jobs_cache_hits() + if job is None: + current_app.logger.warning( + "Couldnt find phone for job_id {job_id} row number {job_row_number} because job is missing" + ) + return "Unknown Phone" + + if JOBS.get(f"{job_id}_phones") is None: + JOBS[f"{job_id}_phones"] = extract_phones(job) + + if JOBS.get(f"{job_id}_phones") is not None: + phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) + if phone_to_return: + print(f"USING SHORT CUT! {phone_to_return}") + return phone_to_return + job = job.split("\r\n") first_row = job[0] job.pop(0) @@ -121,11 +163,10 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): correct_row = job[job_row_number] correct_row = correct_row.split(",") - # This could happen if an old job cannot be retrieved from s3 - if len(correct_row) <= phone_index: - return "Unknown Phone" my_phone = correct_row[phone_index] my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) + if not my_phone: + return "Unknown Phone" return my_phone diff --git a/app/service/rest.py b/app/service/rest.py index cef10ef1a..2f2c30697 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -433,8 +433,12 @@ def get_all_notifications_for_service(service_id): notification.job_id, notification.job_row_number, ) + print(f"RECIPIENTE IN service/rest {recipient}") notification.to = recipient notification.normalised_to = recipient + else: + notification.to = "1" + notification.normalised_to = "1" kwargs = request.args.to_dict() kwargs["service_id"] = service_id @@ -474,7 +478,7 @@ def get_all_notifications_for_service(service_id): page, len(next_page_of_pagination.items), ".get_all_notifications_for_service", - **kwargs + **kwargs, ) if count_pages else {}, From 797c9f5fc8f2be4ec7477747ba50e06018e2d2cb Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 18 Jan 2024 14:06:32 -0800 Subject: [PATCH 2/4] remove print statement --- app/aws/s3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index b6749c217..2e8f5196d 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -147,7 +147,6 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): if JOBS.get(f"{job_id}_phones") is not None: phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) if phone_to_return: - print(f"USING SHORT CUT! {phone_to_return}") return phone_to_return job = job.split("\r\n") From 510e67e20bf1ac254d1699e67120a69db778a263 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 18 Jan 2024 15:12:52 -0800 Subject: [PATCH 3/4] code review feedback --- app/aws/s3.py | 34 ++++++++++++++-------------------- app/service/rest.py | 1 - 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 2e8f5196d..f1fdb8737 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -94,7 +94,7 @@ def incr_jobs_cache_hits(): redis_store.incr(JOBS_CACHE_HITS) hits = redis_store.get(JOBS_CACHE_HITS).decode("utf-8") misses = redis_store.get(JOBS_CACHE_MISSES).decode("utf-8") - current_app.logger.info(f"JOBS CACHE MISS hits {hits} misses {misses}") + current_app.logger.info(f"JOBS CACHE HIT hits {hits} misses {misses}") def extract_phones(job): @@ -135,38 +135,32 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): else: incr_jobs_cache_hits() + # If the job is None after our attempt to retrieve it from s3, it + # probably means the job is old and has been deleted from s3, in + # which case there is nothing we can do. It's unlikely to run into + # this, but it could theoretically happen, especially if we ever + # change the task schedules if job is None: current_app.logger.warning( "Couldnt find phone for job_id {job_id} row number {job_row_number} because job is missing" ) return "Unknown Phone" + # If we look in the JOBS cache for the quick lookup dictionary of phones for a given job + # and that dictionary is not there, create it if JOBS.get(f"{job_id}_phones") is None: JOBS[f"{job_id}_phones"] = extract_phones(job) + # If we can find the quick dictionary, use it if JOBS.get(f"{job_id}_phones") is not None: phone_to_return = JOBS.get(f"{job_id}_phones").get(job_row_number) if phone_to_return: return phone_to_return - - job = job.split("\r\n") - first_row = job[0] - job.pop(0) - first_row = first_row.split(",") - phone_index = 0 - for item in first_row: - if item.lower() == "phone number": - break - phone_index = phone_index + 1 - - correct_row = job[job_row_number] - correct_row = correct_row.split(",") - - my_phone = correct_row[phone_index] - my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) - if not my_phone: - return "Unknown Phone" - return my_phone + else: + current_app.logger.warning( + "Was unable to retrieve phone number from lookup dictionary for job {job_id}" + ) + return "Unknown Phone" def get_job_metadata_from_s3(service_id, job_id): diff --git a/app/service/rest.py b/app/service/rest.py index 2f2c30697..bc52d49d9 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -433,7 +433,6 @@ def get_all_notifications_for_service(service_id): notification.job_id, notification.job_row_number, ) - print(f"RECIPIENTE IN service/rest {recipient}") notification.to = recipient notification.normalised_to = recipient else: From a100f60369c1ef1abcc777424faff66a11b66a09 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Fri, 19 Jan 2024 09:02:44 -0800 Subject: [PATCH 4/4] code review feedback --- app/aws/s3.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index f1fdb8737..f942feefb 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -158,9 +158,14 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number): return phone_to_return else: current_app.logger.warning( - "Was unable to retrieve phone number from lookup dictionary for job {job_id}" + f"Was unable to retrieve phone number from lookup dictionary for job {job_id}" ) return "Unknown Phone" + else: + current_app.logger.error( + f"Was unable to construct lookup dictionary for job {job_id}" + ) + return "Unknown Phone" def get_job_metadata_from_s3(service_id, job_id):