mirror of
https://github.com/GSA/notifications-api.git
synced 2026-02-02 09:26:08 -05:00
Add methods to get and remove s3 bucket objects
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
from boto3 import resource
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask import current_app
|
||||
|
||||
import pytz
|
||||
from boto3 import client, resource
|
||||
|
||||
FILE_LOCATION_STRUCTURE = 'service-{}-notify/{}.csv'
|
||||
|
||||
|
||||
@@ -24,7 +28,46 @@ def get_job_from_s3(service_id, job_id):
|
||||
def remove_job_from_s3(service_id, job_id):
|
||||
bucket_name = current_app.config['CSV_UPLOAD_BUCKET_NAME']
|
||||
file_location = FILE_LOCATION_STRUCTURE.format(service_id, job_id)
|
||||
obj = get_s3_object(bucket_name, file_location)
|
||||
return remove_s3_object(bucket_name, file_location)
|
||||
|
||||
|
||||
def get_s3_bucket_objects(bucket_name, subfolder='', older_than=7, limit_days=2):
|
||||
boto_client = client('s3', current_app.config['AWS_REGION'])
|
||||
paginator = boto_client.get_paginator('list_objects_v2')
|
||||
page_iterator = paginator.paginate(
|
||||
Bucket=bucket_name,
|
||||
Prefix=subfolder
|
||||
)
|
||||
|
||||
all_objects_in_bucket = []
|
||||
for page in page_iterator:
|
||||
all_objects_in_bucket.extend(page['Contents'])
|
||||
|
||||
return all_objects_in_bucket
|
||||
|
||||
|
||||
def filter_s3_bucket_objects_within_date_range(bucket_objects, older_than=7, limit_days=2):
|
||||
"""
|
||||
S3 returns the Object['LastModified'] as an 'offset-aware' timestamp so the
|
||||
date range filter must take this into account.
|
||||
|
||||
Additionally an additional Object is returned by S3 corresponding to the
|
||||
container directory. This is redundant and should be removed.
|
||||
|
||||
"""
|
||||
end_date = datetime.now(tz=pytz.utc) - timedelta(days=older_than)
|
||||
start_date = end_date - timedelta(days=limit_days)
|
||||
filtered_items = [item for item in bucket_objects if all([
|
||||
not item['Key'].endswith('/'),
|
||||
item['LastModified'] > start_date,
|
||||
item['LastModified'] < end_date
|
||||
])]
|
||||
|
||||
return filtered_items
|
||||
|
||||
|
||||
def remove_s3_object(bucket_name, object_key):
|
||||
obj = get_s3_object(bucket_name, object_key)
|
||||
return obj.delete()
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,29 @@
|
||||
from unittest.mock import call
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask import current_app
|
||||
|
||||
from app.aws.s3 import get_s3_file, remove_transformed_dvla_file
|
||||
from freezegun import freeze_time
|
||||
import pytz
|
||||
|
||||
from app.aws.s3 import (
|
||||
get_s3_bucket_objects,
|
||||
get_s3_file,
|
||||
filter_s3_bucket_objects_within_date_range,
|
||||
remove_transformed_dvla_file
|
||||
)
|
||||
|
||||
|
||||
def datetime_in_past(days=0, seconds=0):
|
||||
return datetime.now(tz=pytz.utc) - timedelta(days=days, seconds=seconds)
|
||||
|
||||
|
||||
def single_s3_object_stub(key='foo', last_modified=datetime.utcnow()):
|
||||
return {
|
||||
'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
|
||||
'Key': key,
|
||||
'LastModified': last_modified
|
||||
}
|
||||
|
||||
|
||||
def test_get_s3_file_makes_correct_call(notify_api, mocker):
|
||||
@@ -25,3 +46,83 @@ def test_remove_transformed_dvla_file_makes_correct_call(notify_api, mocker):
|
||||
call(current_app.config['DVLA_UPLOAD_BUCKET_NAME'], '{}-dvla-job.text'.format(fake_uuid)),
|
||||
call().delete()
|
||||
])
|
||||
|
||||
|
||||
def test_get_s3_bucket_objects_make_correct_pagination_call(notify_api, mocker):
|
||||
paginator_mock = mocker.patch('app.aws.s3.client')
|
||||
|
||||
get_s3_bucket_objects('foo-bucket', subfolder='bar')
|
||||
|
||||
paginator_mock.assert_has_calls([
|
||||
call().get_paginator().paginate(Bucket='foo-bucket', Prefix='bar')
|
||||
])
|
||||
|
||||
|
||||
def test_get_s3_bucket_objects_builds_objects_list_from_paginator(notify_api, mocker):
|
||||
paginator_mock = mocker.patch('app.aws.s3.client')
|
||||
multiple_pages_s3_object = [
|
||||
{
|
||||
"Contents": [
|
||||
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)),
|
||||
]
|
||||
},
|
||||
{
|
||||
"Contents": [
|
||||
single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)),
|
||||
]
|
||||
}
|
||||
]
|
||||
paginator_mock.return_value.get_paginator.return_value.paginate.return_value = multiple_pages_s3_object
|
||||
|
||||
bucket_objects = get_s3_bucket_objects('foo-bucket', subfolder='bar')
|
||||
|
||||
assert len(bucket_objects) == 2
|
||||
assert set(bucket_objects[0].keys()) == set(['ETag', 'Key', 'LastModified'])
|
||||
|
||||
|
||||
@freeze_time("2016-01-01 11:00:00")
|
||||
def test_get_s3_bucket_objects_removes_redundant_root_object(notify_api, mocker):
|
||||
s3_objects_stub = [
|
||||
single_s3_object_stub('bar/', datetime_in_past(days=8)),
|
||||
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)),
|
||||
]
|
||||
|
||||
filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub)
|
||||
|
||||
assert len(filtered_items) == 1
|
||||
|
||||
assert filtered_items[0]["Key"] == 'bar/foo.txt'
|
||||
assert filtered_items[0]["LastModified"] == datetime_in_past(days=8)
|
||||
|
||||
|
||||
@freeze_time("2016-01-01 11:00:00")
|
||||
def test_filter_s3_bucket_objects_within_date_range_filters_by_date_range(notify_api, mocker):
|
||||
s3_objects_stub = [
|
||||
single_s3_object_stub('bar/', datetime_in_past(days=8)),
|
||||
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)),
|
||||
single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)),
|
||||
]
|
||||
|
||||
filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub)
|
||||
|
||||
assert len(filtered_items) == 2
|
||||
|
||||
assert filtered_items[0]["Key"] == 'bar/foo.txt'
|
||||
assert filtered_items[0]["LastModified"] == datetime_in_past(days=8)
|
||||
|
||||
assert filtered_items[1]["Key"] == 'bar/foo1.txt'
|
||||
assert filtered_items[1]["LastModified"] == datetime_in_past(days=8)
|
||||
|
||||
|
||||
@freeze_time("2016-01-01 11:00:00")
|
||||
def test_get_s3_bucket_objects_does_not_return_outside_of_date_range(notify_api, mocker):
|
||||
s3_objects_stub = [
|
||||
single_s3_object_stub('bar/', datetime_in_past(days=7)),
|
||||
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=7)),
|
||||
single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9)),
|
||||
single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9, seconds=1)),
|
||||
]
|
||||
|
||||
filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub)
|
||||
|
||||
assert len(filtered_items) == 0
|
||||
|
||||
Reference in New Issue
Block a user