From a1a5377f9c1e88b38481d8d4c3c581039bfb2bf8 Mon Sep 17 00:00:00 2001 From: Imdad Ahad Date: Mon, 12 Jun 2017 15:55:05 +0100 Subject: [PATCH] Add methods to get and remove s3 bucket objects --- app/aws/s3.py | 47 +++++++++++++++++- tests/app/aws/test_s3.py | 103 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 147 insertions(+), 3 deletions(-) diff --git a/app/aws/s3.py b/app/aws/s3.py index f5798314b..48a54e709 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -1,6 +1,10 @@ -from boto3 import resource +from datetime import datetime, timedelta + from flask import current_app +import pytz +from boto3 import client, resource + FILE_LOCATION_STRUCTURE = 'service-{}-notify/{}.csv' @@ -24,7 +28,46 @@ def get_job_from_s3(service_id, job_id): def remove_job_from_s3(service_id, job_id): bucket_name = current_app.config['CSV_UPLOAD_BUCKET_NAME'] file_location = FILE_LOCATION_STRUCTURE.format(service_id, job_id) - obj = get_s3_object(bucket_name, file_location) + return remove_s3_object(bucket_name, file_location) + + +def get_s3_bucket_objects(bucket_name, subfolder='', older_than=7, limit_days=2): + boto_client = client('s3', current_app.config['AWS_REGION']) + paginator = boto_client.get_paginator('list_objects_v2') + page_iterator = paginator.paginate( + Bucket=bucket_name, + Prefix=subfolder + ) + + all_objects_in_bucket = [] + for page in page_iterator: + all_objects_in_bucket.extend(page['Contents']) + + return all_objects_in_bucket + + +def filter_s3_bucket_objects_within_date_range(bucket_objects, older_than=7, limit_days=2): + """ + S3 returns the Object['LastModified'] as an 'offset-aware' timestamp so the + date range filter must take this into account. + + Additionally an additional Object is returned by S3 corresponding to the + container directory. This is redundant and should be removed. + + """ + end_date = datetime.now(tz=pytz.utc) - timedelta(days=older_than) + start_date = end_date - timedelta(days=limit_days) + filtered_items = [item for item in bucket_objects if all([ + not item['Key'].endswith('/'), + item['LastModified'] > start_date, + item['LastModified'] < end_date + ])] + + return filtered_items + + +def remove_s3_object(bucket_name, object_key): + obj = get_s3_object(bucket_name, object_key) return obj.delete() diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index fd30c16df..7312ad549 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,8 +1,29 @@ from unittest.mock import call +from datetime import datetime, timedelta from flask import current_app -from app.aws.s3 import get_s3_file, remove_transformed_dvla_file +from freezegun import freeze_time +import pytz + +from app.aws.s3 import ( + get_s3_bucket_objects, + get_s3_file, + filter_s3_bucket_objects_within_date_range, + remove_transformed_dvla_file +) + + +def datetime_in_past(days=0, seconds=0): + return datetime.now(tz=pytz.utc) - timedelta(days=days, seconds=seconds) + + +def single_s3_object_stub(key='foo', last_modified=datetime.utcnow()): + return { + 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', + 'Key': key, + 'LastModified': last_modified + } def test_get_s3_file_makes_correct_call(notify_api, mocker): @@ -25,3 +46,83 @@ def test_remove_transformed_dvla_file_makes_correct_call(notify_api, mocker): call(current_app.config['DVLA_UPLOAD_BUCKET_NAME'], '{}-dvla-job.text'.format(fake_uuid)), call().delete() ]) + + +def test_get_s3_bucket_objects_make_correct_pagination_call(notify_api, mocker): + paginator_mock = mocker.patch('app.aws.s3.client') + + get_s3_bucket_objects('foo-bucket', subfolder='bar') + + paginator_mock.assert_has_calls([ + call().get_paginator().paginate(Bucket='foo-bucket', Prefix='bar') + ]) + + +def test_get_s3_bucket_objects_builds_objects_list_from_paginator(notify_api, mocker): + paginator_mock = mocker.patch('app.aws.s3.client') + multiple_pages_s3_object = [ + { + "Contents": [ + single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)), + ] + }, + { + "Contents": [ + single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)), + ] + } + ] + paginator_mock.return_value.get_paginator.return_value.paginate.return_value = multiple_pages_s3_object + + bucket_objects = get_s3_bucket_objects('foo-bucket', subfolder='bar') + + assert len(bucket_objects) == 2 + assert set(bucket_objects[0].keys()) == set(['ETag', 'Key', 'LastModified']) + + +@freeze_time("2016-01-01 11:00:00") +def test_get_s3_bucket_objects_removes_redundant_root_object(notify_api, mocker): + s3_objects_stub = [ + single_s3_object_stub('bar/', datetime_in_past(days=8)), + single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)), + ] + + filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub) + + assert len(filtered_items) == 1 + + assert filtered_items[0]["Key"] == 'bar/foo.txt' + assert filtered_items[0]["LastModified"] == datetime_in_past(days=8) + + +@freeze_time("2016-01-01 11:00:00") +def test_filter_s3_bucket_objects_within_date_range_filters_by_date_range(notify_api, mocker): + s3_objects_stub = [ + single_s3_object_stub('bar/', datetime_in_past(days=8)), + single_s3_object_stub('bar/foo.txt', datetime_in_past(days=8)), + single_s3_object_stub('bar/foo1.txt', datetime_in_past(days=8)), + ] + + filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub) + + assert len(filtered_items) == 2 + + assert filtered_items[0]["Key"] == 'bar/foo.txt' + assert filtered_items[0]["LastModified"] == datetime_in_past(days=8) + + assert filtered_items[1]["Key"] == 'bar/foo1.txt' + assert filtered_items[1]["LastModified"] == datetime_in_past(days=8) + + +@freeze_time("2016-01-01 11:00:00") +def test_get_s3_bucket_objects_does_not_return_outside_of_date_range(notify_api, mocker): + s3_objects_stub = [ + single_s3_object_stub('bar/', datetime_in_past(days=7)), + single_s3_object_stub('bar/foo.txt', datetime_in_past(days=7)), + single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9)), + single_s3_object_stub('bar/foo2.txt', datetime_in_past(days=9, seconds=1)), + ] + + filtered_items = filter_s3_bucket_objects_within_date_range(s3_objects_stub) + + assert len(filtered_items) == 0