From aa7287bf649777563dcd7ba09bd12e9e6532d875 Mon Sep 17 00:00:00 2001 From: Chris Hill-Scott Date: Wed, 28 Feb 2018 14:45:57 +0000 Subject: [PATCH] Catch exceptions caused by ambiguous Excel files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Excel stores dates as floating point numbers, counting the days (or fraction thereof) since 1900 (except when it counts from 1904). However it also, incorrectly, recognises 1900 as a leap year. This means that it’s ambiguous whether day 59 is February 28th, or February 27th, depending if you’re counting up or down. In fact any number less than 60 is, therefore, ambiguous. This shouldn’t really matter since no-one is going to be using dates in the year 1900 in Notify messages. _Except_ when Excel misidentifies a cell as containing a date. For example, if you have the number `9` inside a cell, it means _house number 9_ if the next cell contains _example_ street. but Excel is all like ‘oh they must want January 9th 1900!’ No. Bad Excel. There’s not much we can do about this, but we can at least give people an error message with the workaround, which is what this commit does. Most of this commit message is paraphrased from: http://xlrd.readthedocs.io/en/latest/dates.html --- app/main/views/send.py | 8 ++++ tests/app/main/views/test_send.py | 68 ++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/app/main/views/send.py b/app/main/views/send.py index 8e62e4137..36a43b9c0 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -22,6 +22,7 @@ from notifications_utils.recipients import ( from orderedset import OrderedSet from werkzeug.routing import RequestRedirect from xlrd.biffh import XLRDError +from xlrd.xldate import XLDateError from app import ( current_service, @@ -137,6 +138,13 @@ def send_messages(service_id, template_id): flash('Couldn’t read {}. Try using a different file format.'.format( form.file.data.filename )) + except (XLDateError): + flash(( + '{} contains numbers or dates that Notify can’t understand. ' + 'Try formatting all columns as ‘text’ or export your file as CSV.' + ).format( + form.file.data.filename + )) column_headings = first_column_headings[template.template_type] + list(template.placeholders) diff --git a/tests/app/main/views/test_send.py b/tests/app/main/views/test_send.py index 4f94b3b32..3554a115d 100644 --- a/tests/app/main/views/test_send.py +++ b/tests/app/main/views/test_send.py @@ -5,6 +5,7 @@ from glob import glob from io import BytesIO from itertools import repeat from os import path +from zipfile import BadZipFile import pytest from bs4 import BeautifulSoup @@ -37,6 +38,13 @@ from tests.conftest import ( no_sms_senders, normalize_spaces, ) +from xlrd.biffh import XLRDError +from xlrd.xldate import ( + XLDateAmbiguous, + XLDateError, + XLDateNegative, + XLDateTooLarge, +) template_types = ['email', 'sms'] @@ -278,6 +286,7 @@ def test_upload_files_in_different_formats( data={'file': (BytesIO(uploaded.read()), filename)}, content_type='multipart/form-data' ) + page = BeautifulSoup(response.data.decode('utf-8'), 'html.parser') if acceptable_file: assert mock_s3_upload.call_args[0][1]['data'].strip() == ( @@ -288,9 +297,64 @@ def test_upload_files_in_different_formats( ) else: assert not mock_s3_upload.called - assert ( + assert normalize_spaces(page.select_one('.banner-dangerous').text) == ( 'Couldn’t read {}. Try using a different file format.'.format(filename) - ) in response.get_data(as_text=True) + ) + + +@pytest.mark.parametrize('exception, expected_error_message', [ + (partial(UnicodeDecodeError, 'codec', b'', 1, 2, 'reason'), ( + 'Couldn’t read example.xlsx. Try using a different file format.' + )), + (BadZipFile, ( + 'Couldn’t read example.xlsx. Try using a different file format.' + )), + (XLRDError, ( + 'Couldn’t read example.xlsx. Try using a different file format.' + )), + (XLDateError, ( + 'example.xlsx contains numbers or dates that Notify can’t understand. ' + 'Try formatting all columns as ‘text’ or export your file as CSV.' + )), + (XLDateNegative, ( + 'example.xlsx contains numbers or dates that Notify can’t understand. ' + 'Try formatting all columns as ‘text’ or export your file as CSV.' + )), + (XLDateAmbiguous, ( + 'example.xlsx contains numbers or dates that Notify can’t understand. ' + 'Try formatting all columns as ‘text’ or export your file as CSV.' + )), + (XLDateTooLarge, ( + 'example.xlsx contains numbers or dates that Notify can’t understand. ' + 'Try formatting all columns as ‘text’ or export your file as CSV.' + )), +]) +def test_shows_error_if_parsing_exception( + logged_in_client, + mocker, + mock_get_service_template, + exception, + expected_error_message, +): + + def _raise_exception_or_partial_exception(file_content, filename): + raise exception() + + mocker.patch( + 'app.main.views.send.Spreadsheet.from_file', + side_effect=_raise_exception_or_partial_exception + ) + + response = logged_in_client.post( + url_for('main.send_messages', service_id=SERVICE_ONE_ID, template_id=fake_uuid), + data={'file': (BytesIO(b'example'), 'example.xlsx')}, + content_type='multipart/form-data' + ) + page = BeautifulSoup(response.data.decode('utf-8'), 'html.parser') + + assert normalize_spaces(page.select_one('.banner-dangerous').text) == ( + expected_error_message + ) def test_upload_csvfile_with_errors_shows_check_page_with_errors(