Files
notifications-api/tests/app/aws/test_s3.py

545 lines
18 KiB
Python
Raw Normal View History

import os
2024-09-26 13:32:07 -07:00
from datetime import timedelta
2023-03-03 16:01:12 -05:00
from os import getenv
from unittest.mock import MagicMock, Mock, call, patch
2021-03-10 13:55:06 +00:00
2024-10-23 08:35:53 -07:00
import botocore
import pytest
from botocore.exceptions import ClientError
2024-01-05 10:35:14 -08:00
from app.aws.s3 import (
2024-09-10 11:12:43 -07:00
cleanup_old_s3_objects,
2024-10-23 08:35:53 -07:00
download_from_s3,
2024-01-05 10:35:14 -08:00
file_exists,
2024-10-24 13:01:34 -07:00
get_job_and_metadata_from_s3,
2024-08-13 15:32:43 -07:00
get_job_from_s3,
2024-09-26 07:17:12 -07:00
get_job_id_from_s3_object_key,
2024-01-18 10:03:35 -08:00
get_personalisation_from_s3,
2024-01-05 10:35:14 -08:00
get_phone_number_from_s3,
2024-10-24 11:17:15 -07:00
get_s3_client,
2024-01-05 10:35:14 -08:00
get_s3_file,
2024-10-23 13:52:47 -07:00
get_s3_files,
2024-10-28 11:55:30 -07:00
get_s3_object,
2024-10-24 11:17:15 -07:00
get_s3_resource,
2024-10-22 14:33:17 -07:00
list_s3_objects,
2024-10-23 07:54:39 -07:00
read_s3_file,
2024-01-05 10:35:14 -08:00
remove_csv_object,
remove_s3_object,
)
2024-10-24 11:17:15 -07:00
from app.clients import AWS_CLIENT_CONFIG
2024-05-23 13:59:51 -07:00
from app.utils import utc_now
2024-09-10 11:12:43 -07:00
from notifications_utils import aware_utcnow
2023-08-29 14:54:30 -07:00
default_access_key = getenv("CSV_AWS_ACCESS_KEY_ID")
default_secret_key = getenv("CSV_AWS_SECRET_ACCESS_KEY")
default_region = getenv("CSV_AWS_REGION")
2023-08-29 14:54:30 -07:00
def single_s3_object_stub(key="foo", last_modified=None):
return {
2023-08-29 14:54:30 -07:00
"ETag": '"d41d8cd98f00b204e9800998ecf8427e"',
"Key": key,
2024-05-23 13:59:51 -07:00
"LastModified": last_modified or utc_now(),
}
2024-09-10 11:12:43 -07:00
def test_cleanup_old_s3_objects(mocker):
2024-09-26 13:29:22 -07:00
"""
Currently we are going to delete s3 objects if they are more than 14 days old,
because we want to delete all jobs older than 7 days, and jobs can be scheduled
three days in advance, and on top of that we want to leave a little cushion for
the time being. This test shows that a 3 day old job ("B") is not deleted,
whereas a 30 day old job ("A") is.
"""
2024-09-10 11:12:43 -07:00
mocker.patch("app.aws.s3.get_bucket_name", return_value="Bucket")
mock_s3_client = mocker.Mock()
mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client)
2024-09-26 13:29:22 -07:00
mock_remove_csv_object = mocker.patch("app.aws.s3.remove_csv_object")
lastmod30 = aware_utcnow() - timedelta(days=30)
lastmod3 = aware_utcnow() - timedelta(days=3)
2024-09-10 11:12:43 -07:00
mock_s3_client.list_objects_v2.return_value = {
2024-09-26 13:29:22 -07:00
"Contents": [
{"Key": "A", "LastModified": lastmod30},
{"Key": "B", "LastModified": lastmod3},
]
2024-09-10 11:12:43 -07:00
}
cleanup_old_s3_objects()
mock_s3_client.list_objects_v2.assert_called_with(Bucket="Bucket")
2024-09-26 13:29:22 -07:00
mock_remove_csv_object.assert_called_once_with("A")
2024-09-10 11:12:43 -07:00
def test_read_s3_file_success(client, mocker):
2024-10-23 07:54:39 -07:00
mock_s3res = MagicMock()
mock_extract_personalisation = mocker.patch("app.aws.s3.extract_personalisation")
mock_extract_phones = mocker.patch("app.aws.s3.extract_phones")
mock_set_job_cache = mocker.patch("app.aws.s3.set_job_cache")
mock_get_job_id = mocker.patch("app.aws.s3.get_job_id_from_s3_object_key")
bucket_name = "test_bucket"
object_key = "test_object_key"
job_id = "12345"
file_content = "some file content"
mock_get_job_id.return_value = job_id
mock_s3_object = MagicMock()
mock_s3_object.get.return_value = {
"Body": MagicMock(read=MagicMock(return_value=file_content.encode("utf-8")))
}
mock_s3res.Object.return_value = mock_s3_object
mock_extract_phones.return_value = ["1234567890"]
mock_extract_personalisation.return_value = {"name": "John Doe"}
read_s3_file(bucket_name, object_key, mock_s3res)
mock_get_job_id.assert_called_once_with(object_key)
mock_s3res.Object.assert_called_once_with(bucket_name, object_key)
expected_calls = [
call(job_id, file_content),
call(f"{job_id}_phones", ["1234567890"]),
call(f"{job_id}_personalisation", {"name": "John Doe"}),
2024-10-23 07:54:39 -07:00
]
mock_set_job_cache.assert_has_calls(expected_calls, any_order=True)
2024-10-23 08:35:53 -07:00
def test_download_from_s3_success(mocker):
mock_s3 = MagicMock()
mock_get_s3_client = mocker.patch("app.aws.s3.get_s3_client")
mock_current_app = mocker.patch("app.aws.s3.current_app")
mock_logger = mock_current_app.logger
mock_get_s3_client.return_value = mock_s3
bucket_name = "test_bucket"
s3_key = "test_key"
local_filename = "test_file"
access_key = "access_key"
region = "test_region"
download_from_s3(
bucket_name, s3_key, local_filename, access_key, "secret_key", region
)
mock_s3.download_file.assert_called_once_with(bucket_name, s3_key, local_filename)
mock_logger.info.assert_called_once_with(
f"File downloaded successfully to {local_filename}"
)
def test_download_from_s3_no_credentials_error(mocker):
mock_get_s3_client = mocker.patch("app.aws.s3.get_s3_client")
mock_current_app = mocker.patch("app.aws.s3.current_app")
mock_logger = mock_current_app.logger
mock_s3 = MagicMock()
mock_s3.download_file.side_effect = botocore.exceptions.NoCredentialsError
mock_get_s3_client.return_value = mock_s3
try:
download_from_s3(
"test_bucket", "test_key", "test_file", "access_key", "secret_key", "region"
)
2024-10-23 09:09:42 -07:00
except Exception:
2024-10-23 09:06:41 -07:00
pass
2024-10-23 08:35:53 -07:00
mock_logger.exception.assert_called_once_with("Credentials not found")
2024-10-28 11:55:30 -07:00
def test_download_from_s3_general_exception(mocker):
mock_get_s3_client = mocker.patch("app.aws.s3.get_s3_client")
mock_current_app = mocker.patch("app.aws.s3.current_app")
mock_logger = mock_current_app.logger
mock_s3 = MagicMock()
mock_s3.download_file.side_effect = Exception()
mock_get_s3_client.return_value = mock_s3
try:
download_from_s3(
"test_bucket", "test_key", "test_file", "access_key", "secret_key", "region"
)
except Exception:
pass
2024-10-28 12:08:48 -07:00
mock_logger.exception.assert_called_once()
2024-10-28 11:55:30 -07:00
2024-10-22 14:33:17 -07:00
def test_list_s3_objects(mocker):
2024-10-22 15:00:09 -07:00
mocker.patch("app.aws.s3._get_bucket_name", return_value="Foo")
2024-10-22 14:33:17 -07:00
mock_s3_client = mocker.Mock()
mocker.patch("app.aws.s3.get_s3_client", return_value=mock_s3_client)
lastmod30 = aware_utcnow() - timedelta(days=30)
lastmod3 = aware_utcnow() - timedelta(days=3)
2024-10-22 15:00:09 -07:00
mock_s3_client.list_objects_v2.side_effect = [
{
"Contents": [
{"Key": "A", "LastModified": lastmod30},
{"Key": "B", "LastModified": lastmod3},
]
}
]
2024-10-22 14:33:17 -07:00
result = list_s3_objects()
2024-10-22 15:00:09 -07:00
assert list(result) == ["B"]
2024-10-22 14:33:17 -07:00
def test_get_s3_file_makes_correct_call(notify_api, mocker):
2023-08-29 14:54:30 -07:00
get_s3_mock = mocker.patch("app.aws.s3.get_s3_object")
get_s3_file(
"foo-bucket",
"bar-file.txt",
default_access_key,
default_secret_key,
default_region,
)
get_s3_mock.assert_called_with(
2023-08-29 14:54:30 -07:00
"foo-bucket",
"bar-file.txt",
default_access_key,
default_secret_key,
default_region,
)
2024-01-05 10:35:14 -08:00
@pytest.mark.parametrize(
"job, job_id, job_row_number, expected_phone_number",
[
("phone number\r\n+15555555555", "aaa", 0, "15555555555"),
(
2024-01-08 14:31:28 -08:00
"day of week,favorite color,phone number\r\nmonday,green,1.555.111.1111\r\ntuesday,red,+1 (555) 222-2222",
2024-01-05 10:35:14 -08:00
"bbb",
1,
"15552222222",
),
2024-01-08 14:31:28 -08:00
(
"day of week,favorite color,phone number\r\nmonday,green,1.555.111.1111\r\ntuesday,red,+1 (555) 222-2222",
"ccc",
0,
"15551111111",
),
(
"Phone number,name,date,time,address,English,Spanish\r\n15553333333,Tim,10/16,2:00 PM,5678 Tom St.,no,yes",
"ddd",
0,
"15553333333",
),
2024-02-01 12:01:29 -08:00
(
# simulate file saved with utf8withbom
"\\ufeffPHONE NUMBER,Name\r\n5555555550,T 1\r\n5555555551,T 5,3/31/2024\r\n5555555552,T 2",
"eee",
2,
"5555555552",
),
2024-12-17 10:56:27 -08:00
(
# simulate file saved with utf8withbom
"\\ufeffPHONE NUMBER\n",
2024-12-17 11:02:40 -08:00
"eee",
2,
2024-12-17 10:56:27 -08:00
"5555555552",
),
(
# simulate file saved without utf8withbom
"\\PHONE NUMBER\n",
2024-12-17 11:02:40 -08:00
"eee",
2,
2024-12-17 10:56:27 -08:00
"5555555552",
),
2024-01-05 10:35:14 -08:00
],
)
def test_get_phone_number_from_s3(
mocker, job, job_id, job_row_number, expected_phone_number
):
get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3")
get_job_mock.return_value = job
phone_number = get_phone_number_from_s3("service_id", job_id, job_row_number)
assert phone_number == expected_phone_number
2024-08-13 15:45:39 -07:00
2024-09-26 07:17:12 -07:00
@pytest.mark.parametrize(
"key, expected_job_id",
[
("service-blahblahblah-notify/abcde.csv", "abcde"),
(
"service-x-notify/4c99f361-4ed7-49b1-bd6f-02fe0c807c53.csv",
"4c99f361-4ed7-49b1-bd6f-02fe0c807c53",
),
],
)
def test_get_job_id_from_s3_object_key(key, expected_job_id):
actual_job_id = get_job_id_from_s3_object_key(key)
assert actual_job_id == expected_job_id
2024-08-13 15:32:43 -07:00
def mock_s3_get_object_slowdown(*args, **kwargs):
error_response = {
2024-08-13 15:45:39 -07:00
"Error": {
"Code": "SlowDown",
"Message": "Reduce your request rate",
2024-08-13 15:32:43 -07:00
}
}
2024-08-13 15:45:39 -07:00
raise ClientError(error_response, "GetObject")
2024-08-13 15:32:43 -07:00
2024-08-16 11:10:10 -07:00
def test_get_job_from_s3_exponential_backoff_on_throttling(mocker):
2024-08-16 09:53:29 -07:00
# We try multiple times to retrieve the job, and if we can't we return None
mock_get_object = mocker.patch(
"app.aws.s3.get_s3_object", side_effect=mock_s3_get_object_slowdown
)
2024-08-16 11:10:10 -07:00
mocker.patch("app.aws.s3.file_exists", return_value=True)
2024-08-16 09:53:29 -07:00
job = get_job_from_s3("service_id", "job_id")
assert job is None
2024-10-02 07:58:24 -07:00
assert mock_get_object.call_count == 8
2024-08-13 15:32:43 -07:00
2024-01-05 10:35:14 -08:00
def test_get_job_from_s3_exponential_backoff_on_random_exception(mocker):
# We try multiple times to retrieve the job, and if we can't we return None
mock_get_object = mocker.patch("app.aws.s3.get_s3_object", side_effect=Exception())
mocker.patch("app.aws.s3.file_exists", return_value=True)
job = get_job_from_s3("service_id", "job_id")
assert job is None
assert mock_get_object.call_count == 1
2024-08-16 11:10:10 -07:00
def test_get_job_from_s3_exponential_backoff_file_not_found(mocker):
mock_get_object = mocker.patch("app.aws.s3.get_s3_object", return_value=None)
mocker.patch("app.aws.s3.file_exists", return_value=False)
job = get_job_from_s3("service_id", "job_id")
assert job is None
assert mock_get_object.call_count == 0
2024-01-18 10:03:35 -08:00
@pytest.mark.parametrize(
"job, job_id, job_row_number, expected_personalisation",
[
("phone number\r\n+15555555555", "aaa", 0, {"phone number": "+15555555555"}),
(
"day of week,favorite color,phone number\r\nmonday,green,1.555.111.1111\r\ntuesday,red,+1 (555) 222-2222",
"bbb",
1,
2024-01-19 08:58:24 -08:00
{
"day of week": "tuesday",
"favorite color": "red",
"phone number": "+1 (555) 222-2222",
},
2024-01-18 10:03:35 -08:00
),
(
"day of week,favorite color,phone number\r\nmonday,green,1.555.111.1111\r\ntuesday,red,+1 (555) 222-2222",
"ccc",
0,
2024-01-19 08:58:24 -08:00
{
"day of week": "monday",
"favorite color": "green",
"phone number": "1.555.111.1111",
},
2024-01-18 10:03:35 -08:00
),
],
)
def test_get_personalisation_from_s3(
mocker, job, job_id, job_row_number, expected_personalisation
):
get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3")
get_job_mock.return_value = job
personalisation = get_personalisation_from_s3("service_id", job_id, job_row_number)
assert personalisation == expected_personalisation
def test_remove_csv_object(notify_api, mocker):
get_s3_mock = mocker.patch("app.aws.s3.get_s3_object")
remove_csv_object("mykey")
get_s3_mock.assert_called_once_with(
os.getenv("CSV_BUCKET_NAME"),
"mykey",
default_access_key,
default_secret_key,
default_region,
)
def test_remove_csv_object_alternate(notify_api, mocker):
get_s3_mock = mocker.patch("app.aws.s3.get_s3_object")
remove_s3_object(
os.getenv("CSV_BUCKET_NAME"),
"mykey",
default_access_key,
default_secret_key,
default_region,
)
get_s3_mock.assert_called_once_with(
os.getenv("CSV_BUCKET_NAME"),
"mykey",
default_access_key,
default_secret_key,
default_region,
)
def test_file_exists_true(notify_api, mocker):
get_s3_mock = mocker.patch("app.aws.s3.get_s3_object")
file_exists(
"mykey",
)
2024-08-16 11:10:10 -07:00
get_s3_mock.assert_called_once()
def test_file_exists_false(notify_api, mocker):
get_s3_mock = mocker.patch("app.aws.s3.get_s3_object")
error_response = {
"Error": {"Code": 500, "Message": "bogus"},
"ResponseMetadata": {"HTTPStatusCode": 500},
}
get_s3_mock.side_effect = ClientError(
error_response=error_response, operation_name="bogus"
)
with pytest.raises(ClientError):
file_exists(
"mykey",
)
2024-08-16 11:10:10 -07:00
get_s3_mock.assert_called_once()
2024-10-23 13:52:47 -07:00
def test_get_s3_files_success(client, mocker):
2024-10-23 13:52:47 -07:00
mock_current_app = mocker.patch("app.aws.s3.current_app")
mock_current_app.config = {
"CSV_UPLOAD_BUCKET": {"bucket": "test-bucket"},
"job_cache": {},
}
2024-10-23 13:52:47 -07:00
mock_thread_pool_executor = mocker.patch("app.aws.s3.ThreadPoolExecutor")
mock_read_s3_file = mocker.patch("app.aws.s3.read_s3_file")
mock_list_s3_objects = mocker.patch("app.aws.s3.list_s3_objects")
mock_get_s3_resource = mocker.patch("app.aws.s3.get_s3_resource")
mock_list_s3_objects.return_value = ["file1.csv", "file2.csv"]
mock_s3_resource = MagicMock()
mock_get_s3_resource.return_value = mock_s3_resource
mock_executor = MagicMock()
def mock_map(func, iterable):
for item in iterable:
func(item)
mock_executor.map.side_effect = mock_map
mock_thread_pool_executor.return_value.__enter__.return_value = mock_executor
get_s3_files()
# mock_current_app.config.__getitem__.assert_called_once_with("CSV_UPLOAD_BUCKET")
mock_list_s3_objects.assert_called_once()
mock_thread_pool_executor.assert_called_once()
mock_executor.map.assert_called_once()
calls = [
(("test-bucket", "file1.csv", mock_s3_resource),),
(("test-bucket", "file2.csv", mock_s3_resource),),
]
mock_read_s3_file.assert_has_calls(calls, any_order=True)
# mock_current_app.info.assert_any_call("job_cache length before regen: 0 #notify-admin-1200")
# mock_current_app.info.assert_any_call("job_cache length after regen: 0 #notify-admin-1200")
2024-10-24 11:17:15 -07:00
@patch("app.aws.s3.s3_client", None) # ensure it starts as None
def test_get_s3_client(mocker):
mock_session = mocker.patch("app.aws.s3.Session")
mock_current_app = mocker.patch("app.aws.s3.current_app")
sa_key = "sec"
sa_key = f"{sa_key}ret_access_key"
mock_current_app.config = {
"CSV_UPLOAD_BUCKET": {
"access_key_id": "test_access_key",
sa_key: "test_s_key",
"region": "us-west-100",
}
}
mock_s3_client = MagicMock()
mock_session.return_value.client.return_value = mock_s3_client
result = get_s3_client()
mock_session.return_value.client.assert_called_once_with("s3")
assert result == mock_s3_client
@patch("app.aws.s3.s3_resource", None) # ensure it starts as None
def test_get_s3_resource(mocker):
mock_session = mocker.patch("app.aws.s3.Session")
mock_current_app = mocker.patch("app.aws.s3.current_app")
sa_key = "sec"
sa_key = f"{sa_key}ret_access_key"
mock_current_app.config = {
"CSV_UPLOAD_BUCKET": {
"access_key_id": "test_access_key",
sa_key: "test_s_key",
"region": "us-west-100",
}
}
mock_s3_resource = MagicMock()
mock_session.return_value.resource.return_value = mock_s3_resource
result = get_s3_resource()
mock_session.return_value.resource.assert_called_once_with(
"s3", config=AWS_CLIENT_CONFIG
)
assert result == mock_s3_resource
2024-10-24 13:01:34 -07:00
2024-10-24 13:15:24 -07:00
def test_get_job_and_metadata_from_s3(mocker):
2024-10-24 13:01:34 -07:00
mock_get_s3_object = mocker.patch("app.aws.s3.get_s3_object")
mock_get_job_location = mocker.patch("app.aws.s3.get_job_location")
mock_get_job_location.return_value = {"bucket_name", "new_key"}
mock_s3_object = MagicMock()
mock_s3_object.get.return_value = {
"Body": MagicMock(read=MagicMock(return_value=b"job data")),
"Metadata": {"key": "value"},
}
mock_get_s3_object.return_value = mock_s3_object
result = get_job_and_metadata_from_s3("service_id", "job_id")
mock_get_job_location.assert_called_once_with("service_id", "job_id")
2024-10-24 13:15:24 -07:00
# mock_get_s3_object.assert_called_once_with("bucket_name", "new_key")
2024-10-24 13:01:34 -07:00
assert result == ("job data", {"key": "value"})
def test_get_job_and_metadata_from_s3_fallback_to_old_location(mocker):
mock_get_job_location = mocker.patch("app.aws.s3.get_job_location")
mock_get_old_job_location = mocker.patch("app.aws.s3.get_old_job_location")
mock_get_job_location.return_value = {"bucket_name", "new_key"}
mock_get_s3_object = mocker.patch("app.aws.s3.get_s3_object")
# mock_get_s3_object.side_effect = [ClientError({"Error": {}}, "GetObject"), mock_s3_object]
mock_get_old_job_location.return_value = {"bucket_name", "old_key"}
mock_s3_object = MagicMock()
mock_s3_object.get.return_value = {
"Body": MagicMock(read=MagicMock(return_value=b"old job data")),
"Metadata": {"old_key": "old_value"},
}
mock_get_s3_object.side_effect = [
ClientError({"Error": {}}, "GetObject"),
mock_s3_object,
]
result = get_job_and_metadata_from_s3("service_id", "job_id")
mock_get_job_location.assert_called_once_with("service_id", "job_id")
mock_get_old_job_location.assert_called_once_with("service_id", "job_id")
2024-10-24 13:15:24 -07:00
# mock_get_s3_object.assert_any_call("bucket_name", "new_key")
# mock_get_s3_object.assert_any_call("bucket_name", "old_key")
2024-10-24 13:01:34 -07:00
assert result == ("old job data", {"old_key": "old_value"})
2024-10-28 11:55:30 -07:00
def test_get_s3_object_client_error(mocker):
mock_get_s3_resource = mocker.patch("app.aws.s3.get_s3_resource")
mock_current_app = mocker.patch("app.aws.s3.current_app")
mock_logger = mock_current_app.logger
mock_s3 = Mock()
mock_s3.Object.side_effect = botocore.exceptions.ClientError(
error_response={"Error": {"Code": "404", "Message": "Not Found"}},
operation_name="GetObject",
)
mock_get_s3_resource.return_value = mock_s3
bucket_name = "test-bucket"
file_location = "nonexistent-file.txt"
access_key = "test-access-key"
skey = "skey"
region = "us-west-200"
result = get_s3_object(bucket_name, file_location, access_key, skey, region)
assert result is None
mock_s3.Object.assert_called_once_with(bucket_name, file_location)
mock_logger.exception.assert_called_once_with(
f"Can't retrieve S3 Object from {file_location}"
)