From 82dd29d457dda520c8efd0d3fe17d7e1f3c924c0 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Thu, 1 Feb 2024 12:01:29 -0800 Subject: [PATCH] handle bom in phone number field --- app/aws/s3.py | 12 ++++-------- tests/app/aws/test_s3.py | 7 +++++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 13879bf6f..ca283d743 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -105,24 +105,20 @@ def extract_phones(job): current_app.logger.info(f"HEADERS {first_row}") phone_index = 0 for item in first_row: - if item.lower() == "phone number": + # Note: may contain a BOM and look like \ufeffphone number + if "phone number" in item.lower(): break phone_index = phone_index + 1 + phones = {} job_row = 0 for row in job: row = row.split(",") - # TODO WHY ARE WE CALCULATING PHONE INDEX IN THE LOOP? - phone_index = 0 - for item in first_row: - if item.lower() == "phone number": - break - phone_index = phone_index + 1 current_app.logger.info(f"PHONE INDEX IS NOW {phone_index}") current_app.logger.info(f"LENGTH OF ROW IS {len(row)}") if phone_index >= len(row): phones[job_row] = "Error: can't retrieve phone number" - current_app.logger.error("Corrupt csv file, missing columns job_id {job_id} service_id {service_id}") + current_app.logger.error("Corrupt csv file") else: my_phone = row[phone_index] my_phone = re.sub(r"[\+\s\(\)\-\.]*", "", my_phone) diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index ad01a00c5..a5134501c 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -67,6 +67,13 @@ def test_get_s3_file_makes_correct_call(notify_api, mocker): 0, "15553333333", ), + ( + # simulate file saved with utf8withbom + "\\ufeffPHONE NUMBER,Name\r\n5555555550,T 1\r\n5555555551,T 5,3/31/2024\r\n5555555552,T 2", + "eee", + 2, + "5555555552", + ), ], ) def test_get_phone_number_from_s3(