Limit length of filename

S3 has a limit of 2kb for metadata:

> the user-defined metadata is limited to 2 KB in size. The size of
> user-defined metadata is measured by taking the sum of the number of
> bytes in the UTF-8 encoding of each key and value.

– https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#object-metadata

This means we have a limit of 1870 bytes for the filename:
```python
encoded = 'notification_count50000template_id665d26e7-ceac-4cc5-82ed-63d773d21561validTrueoriginal_file_name'.encode('utf-8')
sys.getsizeof(b)
>>> 130
2000-130
>>> 1870
```

Or, in other words, ~918 characters:
```python
sys.getsizeof(('ü'*918).encode('utf-8'))
>>> 1869
```
This commit is contained in:
Chris Hill-Scott
2018-04-30 10:46:39 +01:00
parent 735d5f0a29
commit bc8bc727f3
3 changed files with 51 additions and 1 deletions

View File

@@ -46,6 +46,7 @@ from app.utils import (
get_errors_for_csv,
get_help_argument,
get_template,
unicode_truncate,
user_has_permissions,
)
@@ -555,7 +556,10 @@ def _check_messages(service_id, template_id, upload_id, preview_row, letters_as_
notification_count=len(recipients),
template_id=str(template_id),
valid=True,
original_file_name=request.args.get('original_file_name'),
original_file_name=unicode_truncate(
request.args.get('original_file_name', ''),
1872
),
)
else:
session['file_uploads'].pop(upload_id)

View File

@@ -568,3 +568,8 @@ class GovernmentEmailDomain(AgreementInfo):
))
except StopIteration:
raise NotGovernmentEmailDomain()
def unicode_truncate(s, length):
encoded = s.encode('utf-8')[:length]
return encoded.decode('utf-8', 'ignore')

View File

@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import sys
import uuid
from functools import partial
from glob import glob
@@ -662,6 +663,46 @@ def test_upload_valid_csv_shows_preview_and_table(
assert normalize_spaces(str(row.select('td')[index + 1])) == cell
def test_file_name_truncated_to_fit_in_s3_metadata(
client_request,
mocker,
mock_get_live_service,
mock_get_service_template_with_placeholders,
mock_get_users_by_service,
mock_get_detailed_service_for_today,
mock_s3_set_metadata,
fake_uuid,
):
with client_request.session_transaction() as session:
session['file_uploads'] = {
fake_uuid: {'template_id': fake_uuid}
}
mocker.patch('app.main.views.send.s3download', return_value="""
phone number,name,thing,thing,thing
07700900001, A, foo, foo, foo
""")
file_name = 'ü😁' * 2000
client_request.get(
'main.check_messages',
service_id=SERVICE_ONE_ID,
template_id=fake_uuid,
upload_id=fake_uuid,
original_file_name=file_name,
)
assert sys.getsizeof(
file_name.encode('utf-8')
) > 2000
assert sys.getsizeof(''.join((
'{}{}'.format(key, value) for key, value in
mock_s3_set_metadata.call_args_list[0][1].items()
)).encode('utf-8')) == 1998
def test_show_all_columns_if_there_are_duplicate_recipient_columns(
client_request,
mocker,