actually start deleting old s3 objects

This commit is contained in:
Kenneth Kehl
2024-09-11 07:31:50 -07:00
parent 4e9e014a0c
commit 2f9fea909e
3 changed files with 27 additions and 8 deletions

View File

@@ -209,7 +209,7 @@
"filename": "tests/app/aws/test_s3.py", "filename": "tests/app/aws/test_s3.py",
"hashed_secret": "67a74306b06d0c01624fe0d0249a570f4d093747", "hashed_secret": "67a74306b06d0c01624fe0d0249a570f4d093747",
"is_verified": false, "is_verified": false,
"line_number": 27, "line_number": 28,
"is_secret": false "is_secret": false
} }
], ],
@@ -384,5 +384,5 @@
} }
] ]
}, },
"generated_at": "2024-09-10T18:12:39Z" "generated_at": "2024-09-11T14:31:46Z"
} }

View File

@@ -87,7 +87,6 @@ def get_bucket_name():
def cleanup_old_s3_objects(): def cleanup_old_s3_objects():
bucket_name = get_bucket_name() bucket_name = get_bucket_name()
s3_client = get_s3_client() s3_client = get_s3_client()
@@ -96,13 +95,18 @@ def cleanup_old_s3_objects():
time_limit = aware_utcnow() - datetime.timedelta(days=14) time_limit = aware_utcnow() - datetime.timedelta(days=14)
try: try:
response = s3_client.list_objects_v2(Bucket=bucket_name) response = s3_client.list_objects_v2(Bucket=bucket_name)
print(f"RESPONSE = {response}")
while True: while True:
for obj in response.get("Contents", []): for obj in response.get("Contents", []):
if obj["LastModified"] <= time_limit: if obj["LastModified"] <= time_limit:
current_app.logger.info(
f"#delete-old-s3-objects Wanting to delete: {obj['LastModified']} {obj['Key']}" try:
) remove_csv_object(obj["Key"])
current_app.logger.info(
f"#delete-old-s3-objects Deleted: {obj['LastModified']} {obj['Key']}"
)
except botocore.exceptions.ClientError:
current_app.logger.exception(f"Couldn't delete {obj['Key']}")
if "NextContinuationToken" in response: if "NextContinuationToken" in response:
response = s3_client.list_objects_v2( response = s3_client.list_objects_v2(
Bucket=bucket_name, Bucket=bucket_name,

View File

@@ -1,4 +1,5 @@
import os import os
from datetime import timedelta
from os import getenv from os import getenv
import pytest import pytest
@@ -31,15 +32,29 @@ def single_s3_object_stub(key="foo", last_modified=None):
def test_cleanup_old_s3_objects(mocker): def test_cleanup_old_s3_objects(mocker):
"""
Currently we are going to delete s3 objects if they are more than 14 days old,
because we want to delete all jobs older than 7 days, and jobs can be scheduled
three days in advance, and on top of that we want to leave a little cushion for
the time being. This test shows that a 3 day old job ("B") is not deleted,
whereas a 30 day old job ("A") is.
"""
mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket") mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket")
mock_s3_client = mocker.Mock() mock_s3_client = mocker.Mock()
mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client) mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client)
mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object")
lastmod30 = aware_utcnow() - timedelta(days=30)
lastmod3 = aware_utcnow() - timedelta(days=3)
mock_s3_client.list_objects_v2.return_value = { mock_s3_client.list_objects_v2.return_value = {
"Contents": [{"Key": "A", "LastModified": aware_utcnow()}] "Contents": [
{"Key": "A", "LastModified": lastmod30},
{"Key": "B", "LastModified": lastmod3},
]
} }
cleanup_old_s3_objects() cleanup_old_s3_objects()
mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket") mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket")
mock_remove_csv_object.assert_called_once_with("A")
def test_get_s3_file_makes_correct_call(notify_api, mocker): def test_get_s3_file_makes_correct_call(notify_api, mocker):