From fefdd297eabd6ee01e905ae3b8c9e96cbc76a1c3 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Fri, 6 Sep 2024 11:13:13 -0700 Subject: [PATCH 1/2] initial --- app/aws/s3.py | 27 +++++++++++++++++++++++++++ app/celery/tasks.py | 5 +++++ app/config.py | 5 +++++ 3 files changed, 37 insertions(+) diff --git a/app/aws/s3.py b/app/aws/s3.py index 17baeb398..5c5b889d9 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -82,6 +82,33 @@ def list_s3_objects(): ) +def cleanup_old_s3_objects(): + + bucket_name = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"] + s3_client = get_s3_client() + # Our reports only support 7 days, but can be scheduled 3 days in advance + # Use 14 day for the v1.0 version of this behavior + time_limit = aware_utcnow() - datetime.timedelta(days=14) + try: + response = s3_client.list_objects_v2(Bucket=bucket_name) + while True: + for obj in response.get("Contents", []): + if obj["LastModified"] >= time_limit: + print(f"{obj['LastModified']} {obj['Key']}") + if "NextContinuationToken" in response: + response = s3_client.list_objects_v2( + Bucket=bucket_name, + ContinuationToken=response["NextContinuationToken"], + ) + else: + break + except Exception: + current_app.logger.error( + f"An error occurred while cleaning up old s3 objects #notify-api-1303", + exc_info=True, + ) + + def get_s3_files(): bucket_name = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"] diff --git a/app/celery/tasks.py b/app/celery/tasks.py index e6ed717e7..b173afbb3 100644 --- a/app/celery/tasks.py +++ b/app/celery/tasks.py @@ -446,6 +446,11 @@ def regenerate_job_cache(): s3.get_s3_files() +@notify_celery.task(name="delete-old-s3-objects") +def delete_old_s3_objects(): + s3.cleanup_old_s3_objects() + + @notify_celery.task(name="process-incomplete-jobs") def process_incomplete_jobs(job_ids): jobs = [dao_get_job_by_id(job_id) for job_id in job_ids] diff --git a/app/config.py b/app/config.py index c4ab09e3c..71fa4ed23 100644 --- a/app/config.py +++ b/app/config.py @@ -249,6 +249,11 @@ class Config(object): "schedule": crontab(hour=6, minute=0), "options": {"queue": QueueNames.PERIODIC}, }, + "delete_old_s3_objects": { + "task": "delete-old-s3-objects", + "schedule": crontab(minute="*/5"), + "options": {"queue": QueueNames.PERIODIC}, + }, "regenerate-job-cache": { "task": "regenerate-job-cache", "schedule": crontab(minute="*/30"), From b34012c7c3cc70250a75a7c9f5230e655313cf39 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Fri, 6 Sep 2024 13:15:38 -0700 Subject: [PATCH 2/2] log s3 objects we want to delete --- app/aws/s3.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index 5c5b889d9..dc4abeb19 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -93,8 +93,10 @@ def cleanup_old_s3_objects(): response = s3_client.list_objects_v2(Bucket=bucket_name) while True: for obj in response.get("Contents", []): - if obj["LastModified"] >= time_limit: - print(f"{obj['LastModified']} {obj['Key']}") + if obj["LastModified"] <= time_limit: + current_app.logger.info( + f"#delete-old-s3-objects Wanting to delete: {obj['LastModified']} {obj['Key']}" + ) if "NextContinuationToken" in response: response = s3_client.list_objects_v2( Bucket=bucket_name, @@ -104,7 +106,7 @@ def cleanup_old_s3_objects(): break except Exception: current_app.logger.error( - f"An error occurred while cleaning up old s3 objects #notify-api-1303", + "#delete-old-s3-objects An error occurred while cleaning up old s3 objects", exc_info=True, )