From 2f9fea909e2389ce5df3ef8407537631b108ba59 Mon Sep 17 00:00:00 2001 From: Kenneth Kehl <@kkehl@flexion.us> Date: Wed, 11 Sep 2024 07:31:50 -0700 Subject: [PATCH] actually start deleting old s3 objects --- .ds.baseline | 4 ++-- app/aws/s3.py | 14 +++++++++----- tests/app/aws/test_s3.py | 17 ++++++++++++++++- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/.ds.baseline b/.ds.baseline index 6ef3c9108..40ebe5b16 100644 --- a/.ds.baseline +++ b/.ds.baseline @@ -209,7 +209,7 @@ "filename": "tests/app/aws/test_s3.py", "hashed_secret": "67a74306b06d0c01624fe0d0249a570f4d093747", "is_verified": false, - "line_number": 27, + "line_number": 28, "is_secret": false } ], @@ -384,5 +384,5 @@ } ] }, - "generated_at": "2024-09-10T18:12:39Z" + "generated_at": "2024-09-11T14:31:46Z" } diff --git a/app/aws/s3.py b/app/aws/s3.py index 96ce42907..7bfd31514 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -87,7 +87,6 @@ def get_bucket_name(): def cleanup_old_s3_objects(): - bucket_name = get_bucket_name() s3_client = get_s3_client() @@ -96,13 +95,18 @@ def cleanup_old_s3_objects(): time_limit = aware_utcnow() - datetime.timedelta(days=14) try: response = s3_client.list_objects_v2(Bucket=bucket_name) - print(f"RESPONSE = {response}") while True: for obj in response.get("Contents", []): if obj["LastModified"] <= time_limit: - current_app.logger.info( - f"#delete-old-s3-objects Wanting to delete: {obj['LastModified']} {obj['Key']}" - ) + + try: + remove_csv_object(obj["Key"]) + current_app.logger.info( + f"#delete-old-s3-objects Deleted: {obj['LastModified']} {obj['Key']}" + ) + except botocore.exceptions.ClientError: + current_app.logger.exception(f"Couldn't delete {obj['Key']}") + if "NextContinuationToken" in response: response = s3_client.list_objects_v2( Bucket=bucket_name, diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index dcc1cbe44..b88d515e0 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,4 +1,5 @@ import os +from datetime import timedelta from os import getenv import pytest @@ -31,15 +32,29 @@ def single_s3_object_stub(key="foo", last_modified=None): def test_cleanup_old_s3_objects(mocker): + """ + Currently we are going to delete s3 objects if they are more than 14 days old, + because we want to delete all jobs older than 7 days, and jobs can be scheduled + three days in advance, and on top of that we want to leave a little cushion for + the time being. This test shows that a 3 day old job ("B") is not deleted, + whereas a 30 day old job ("A") is. + """ mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket") mock_s3_client = mocker.Mock() mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client) + mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object") + lastmod30 = aware_utcnow() - timedelta(days=30) + lastmod3 = aware_utcnow() - timedelta(days=3) mock_s3_client.list_objects_v2.return_value = { - "Contents": [{"Key": "A", "LastModified": aware_utcnow()}] + "Contents": [ + {"Key": "A", "LastModified": lastmod30}, + {"Key": "B", "LastModified": lastmod3}, + ] } cleanup_old_s3_objects() mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket") + mock_remove_csv_object.assert_called_once_with("A") def test_get_s3_file_makes_correct_call(notify_api, mocker):