This commit is contained in:
Kenneth Kehl
2024-08-16 09:53:29 -07:00
parent 22eab2fa55
commit 496571686a
3 changed files with 46 additions and 53 deletions

View File

@@ -172,38 +172,41 @@ def get_job_and_metadata_from_s3(service_id, job_id):
def get_job_from_s3(service_id, job_id):
# We have to make sure the retries don't take up to much time, because
# we might be retrieving dozens of jobs. So max time is:
# 0.2 + 0.4 + 0.8 + 1.6 = 3.0 seconds
retries = 0
max_retries = 3
backoff_factor = 1
max_retries = 4
backoff_factor = 0.2
while retries < max_retries:
try:
obj = get_s3_object(*get_job_location(service_id, job_id))
return obj.get()["Body"].read().decode("utf-8")
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] in [
"Throttling",
"RequestTimeout",
"SlowDown",
]:
retries += 1
sleep_time = backoff_factor * (2**retries) # Exponential backoff
time.sleep(sleep_time)
continue
else:
current_app.logger.error(
f"Failed to get job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} from bucket",
exc_info=True,
)
return None
except Exception:
current_app.logger.error(
f"Failed to get job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} from bucket",
f"Failed to get job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} retry_count={retries}",
exc_info=True,
)
return None
retries += 1
sleep_time = backoff_factor * (2**retries) # Exponential backoff
time.sleep(sleep_time)
continue
raise Exception("Failed to get object after 3 attempts")
except Exception:
current_app.logger.error(
f"Failed to get job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)} retry_count={retries}",
exc_info=True,
)
retries += 1
sleep_time = backoff_factor * (2**retries) # Exponential backoff
time.sleep(sleep_time)
continue
current_app.logger.error(
f"Never retrieved job {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
)
return None
def incr_jobs_cache_misses():
@@ -331,7 +334,7 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
# change the task schedules
if job is None:
current_app.logger.warning(
"Couldnt find personalisation for job_id {job_id} row number {job_row_number} because job is missing"
f"Couldnt find personalisation for job_id {job_id} row number {job_row_number} because job is missing"
)
return {}

View File

@@ -503,37 +503,24 @@ def get_all_notifications_for_service(service_id):
for notification in pagination.items:
if notification.job_id is not None:
try:
notification.personalisation = get_personalisation_from_s3(
notification.service_id,
notification.job_id,
notification.job_row_number,
)
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
notification.personalisation = ""
else:
raise ex
notification.personalisation = get_personalisation_from_s3(
notification.service_id,
notification.job_id,
notification.job_row_number,
)
try:
recipient = get_phone_number_from_s3(
notification.service_id,
notification.job_id,
notification.job_row_number,
)
recipient = get_phone_number_from_s3(
notification.service_id,
notification.job_id,
notification.job_row_number,
)
notification.to = recipient
notification.normalised_to = recipient
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
notification.to = ""
notification.normalised_to = ""
else:
raise ex
notification.to = recipient
notification.normalised_to = recipient
else:
notification.to = "1"
notification.normalised_to = "1"
notification.to = ""
notification.normalised_to = ""
kwargs = request.args.to_dict()
kwargs["service_id"] = service_id

View File

@@ -99,10 +99,13 @@ def mock_s3_get_object_slowdown(*args, **kwargs):
def test_get_job_from_s3_exponential_backoff(mocker):
mocker.patch("app.aws.s3.get_s3_object", side_effect=mock_s3_get_object_slowdown)
with pytest.raises(Exception) as exc_info:
get_job_from_s3("service_id", "job_id")
assert "Failed to get object after 3 attempts" in str(exc_info)
# We try multiple times to retrieve the job, and if we can't we return None
mock_get_object = mocker.patch(
"app.aws.s3.get_s3_object", side_effect=mock_s3_get_object_slowdown
)
job = get_job_from_s3("service_id", "job_id")
assert job is None
assert mock_get_object.call_count == 4
@pytest.mark.parametrize(