Add methods to get and remove s3 bucket objects

This commit is contained in:
Imdad Ahad
2017-06-12 15:55:05 +01:00
parent dfeda93bc5
commit a1a5377f9c
2 changed files with 147 additions and 3 deletions

View File

@@ -1,6 +1,10 @@
from boto3 import resource
from datetime import datetime, timedelta
from flask import current_app
import pytz
from boto3 import client, resource
FILE_LOCATION_STRUCTURE = 'service-{}-notify/{}.csv'
@@ -24,7 +28,46 @@ def get_job_from_s3(service_id, job_id):
def remove_job_from_s3(service_id, job_id):
bucket_name = current_app.config['CSV_UPLOAD_BUCKET_NAME']
file_location = FILE_LOCATION_STRUCTURE.format(service_id, job_id)
obj = get_s3_object(bucket_name, file_location)
return remove_s3_object(bucket_name, file_location)
def get_s3_bucket_objects(bucket_name, subfolder='', older_than=7, limit_days=2):
boto_client = client('s3', current_app.config['AWS_REGION'])
paginator = boto_client.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(
Bucket=bucket_name,
Prefix=subfolder
)
all_objects_in_bucket = []
for page in page_iterator:
all_objects_in_bucket.extend(page['Contents'])
return all_objects_in_bucket
def filter_s3_bucket_objects_within_date_range(bucket_objects, older_than=7, limit_days=2):
"""
S3 returns the Object['LastModified'] as an 'offset-aware' timestamp so the
date range filter must take this into account.
Additionally an additional Object is returned by S3 corresponding to the
container directory. This is redundant and should be removed.
"""
end_date = datetime.now(tz=pytz.utc) - timedelta(days=older_than)
start_date = end_date - timedelta(days=limit_days)
filtered_items = [item for item in bucket_objects if all([
not item['Key'].endswith('/'),
item['LastModified'] > start_date,
item['LastModified'] < end_date
])]
return filtered_items
def remove_s3_object(bucket_name, object_key):
obj = get_s3_object(bucket_name, object_key)
return obj.delete()

View File

@@ -1,8 +1,29 @@
from unittest.mock import call
from datetime import datetime, timedelta
from flask import current_app
from app.aws.s3 import get_s3_file, remove_transformed_dvla_file
from freezegun import freeze_time
import pytz
from app.aws.s3 import (
get_s3_bucket_objects,
get_s3_file,
filter_s3_bucket_objects_within_date_range,
remove_transformed_dvla_file
)
def datetime_in_past(days=0, seconds=0):
return datetime.now(tz=pytz.utc) - timedelta(days=days, seconds=seconds)
def single_s3_object_stub(key='foo', last_modified=datetime.utcnow()):
return {
'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
'Key': key,
'LastModified': last_modified
}
def test_get_s3_file_makes_correct_call(notify_api, mocker):
@@ -25,3 +46,83 @@ def test_remove_transformed_dvla_file_makes_correct_call(notify_api, mocker):
call(current_app.config['DVLA_UPLOAD_BUCKET_NAME'], '{}-dvla-job.text'.format(fake_uuid)),
call().delete()
])
def test_get_s3_bucket_objects_make_correct_pagination_call(notify_api, mocker):
paginator_mock = mocker.patch('app.aws.s3.client')
get_s3_bucket_objects('foo-bucket', subfolder='bar')
paginator_mock.assert_has_calls([
call().get_paginator().paginate(Bucket='foo-bucket', Prefix='bar')
])
def test_get_s3_bucket_objects_builds_objects_list_from_paginator(notify_api, mocker):
paginator_mock = mocker.patch('app.aws.s3.client')
multiple_pages_s3_object = [
{
"Contents": [
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)),
]
},
{
"Contents": [
single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)),
]
}
]
paginator_mock.return_value.get_paginator.return_value.paginate.return_value = multiple_pages_s3_object
bucket_objects = get_s3_bucket_objects('foo-bucket', subfolder='bar')
assert len(bucket_objects) == 2
assert set(bucket_objects[0].keys()) == set(['ETag', 'Key', 'LastModified'])
@freeze_time("2016-01-01 11:00:00")
def test_get_s3_bucket_objects_removes_redundant_root_object(notify_api, mocker):
s3_objects_stub = [
single_s3_object_stub('bar/', datetime_in_past(days=8)),
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)),
]
filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub)
assert len(filtered_items) == 1
assert filtered_items[0]["Key"] == 'bar/foo.txt'
assert filtered_items[0]["LastModified"] == datetime_in_past(days=8)
@freeze_time("2016-01-01 11:00:00")
def test_filter_s3_bucket_objects_within_date_range_filters_by_date_range(notify_api, mocker):
s3_objects_stub = [
single_s3_object_stub('bar/', datetime_in_past(days=8)),
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)),
single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)),
]
filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub)
assert len(filtered_items) == 2
assert filtered_items[0]["Key"] == 'bar/foo.txt'
assert filtered_items[0]["LastModified"] == datetime_in_past(days=8)
assert filtered_items[1]["Key"] == 'bar/foo1.txt'
assert filtered_items[1]["LastModified"] == datetime_in_past(days=8)
@freeze_time("2016-01-01 11:00:00")
def test_get_s3_bucket_objects_does_not_return_outside_of_date_range(notify_api, mocker):
s3_objects_stub = [
single_s3_object_stub('bar/', datetime_in_past(days=7)),
single_s3_object_stub('bar/foo.txt', datetime_in_past(days=7)),
single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9)),
single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9, seconds=1)),
]
filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub)
assert len(filtered_items) == 0