From 26a985720cf5f6da29f6a69a634069805aee09a8 Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Wed, 26 Oct 2016 15:44:24 +0100 Subject: [PATCH] fix 500 errors with excel files > 500k size limit werkzeug's internal workings keep files under 500kb in memory, and files greater than 500kb as a TemporaryFile (https://github.com/pallets/werkzeug/blob/0.11-maintenance/werkzeug/formparser.py#L38) when we encounter a CSV or TSV, we call normalise_newlines, which invokes `.read()`, however when we were passing straight into pyexcel, we called `file.getvalue()` - this exists on BytesIO (small files) but not on TemporaryFile objects (large files) - we were seeing 500 errors --- app/utils.py | 5 +---- tests/app/main/views/test_send.py | 1 - tests/app/test_utils.py | 13 +++++++++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/app/utils.py b/app/utils.py index 72232cc6b..78dec801f 100644 --- a/app/utils.py +++ b/app/utils.py @@ -173,18 +173,15 @@ class Spreadsheet(): @classmethod def from_rows(cls, rows, filename=''): - with StringIO() as converted: output = csv.writer(converted) for row in rows: output.writerow(row) - return cls(converted.getvalue(), filename) @classmethod def from_file(cls, file_content, filename=''): - extension = cls.get_extension(filename) if extension == 'csv': @@ -195,7 +192,7 @@ class Spreadsheet(): return cls.from_rows(pyexcel.get_sheet( file_type=extension, - file_content=file_content.getvalue() + file_content=file_content.read() ).to_array(), filename) diff --git a/tests/app/main/views/test_send.py b/tests/app/main/views/test_send.py index cecaac6dc..fbcd64280 100644 --- a/tests/app/main/views/test_send.py +++ b/tests/app/main/views/test_send.py @@ -8,7 +8,6 @@ from bs4 import BeautifulSoup from functools import partial from flask import url_for from tests import validate_route_permission -from datetime import datetime template_types = ['email', 'sms'] diff --git a/tests/app/test_utils.py b/tests/app/test_utils.py index 14495ab2d..be62799e4 100644 --- a/tests/app/test_utils.py +++ b/tests/app/test_utils.py @@ -1,9 +1,12 @@ -import pytest +from pathlib import Path from io import StringIO -from app.utils import email_safe, generate_notifications_csv, generate_previous_dict, generate_next_dict from csv import DictReader + +import pytest from freezegun import freeze_time +from app.utils import email_safe, generate_notifications_csv, generate_previous_dict, generate_next_dict, Spreadsheet + def test_email_safe_return_dot_separated_email_domain(): test_name = 'SOME service with+stuff+ b123' @@ -67,3 +70,9 @@ def test_generate_next_dict(client): def test_generate_previous_next_dict_adds_other_url_args(client): ret = generate_next_dict('main.view_notifications', 'foo', 2, {'message_type': 'blah'}) assert 'notifications/blah' in ret['url'] + + +def test_can_create_spreadsheet_from_large_excel_file(): + with open(str(Path.cwd() / 'tests' / 'spreadsheet_files' / 'excel 2007.xlsx'), 'rb') as xl: + ret = Spreadsheet.from_file(xl, filename='xl.xlsx') + assert ret.as_csv_data