Catch exceptions caused by ambiguous Excel files

Excel stores dates as floating point numbers, counting the days (or
fraction thereof) since 1900 (except when it counts from 1904).

However it also, incorrectly, recognises 1900 as a leap year. This means
that it’s ambiguous whether day 59 is February 28th, or February 27th,
depending if you’re counting up or down. In fact any number less than 60
is, therefore, ambiguous.

This shouldn’t really matter since no-one is going to be using dates in
the year 1900 in Notify messages. _Except_ when Excel misidentifies a
cell as containing a date. For example, if you have the number `9`
inside a cell, it means _house number 9_ if the next cell contains
_example_ street. but Excel is all like ‘oh they must want January 9th
1900!’ No. Bad Excel.

There’s not much we can do about this, but we can at least give people
an error message with the workaround, which is what this commit does.

Most of this commit message is paraphrased from:
http://xlrd.readthedocs.io/en/latest/dates.html
This commit is contained in:
Chris Hill-Scott
2018-02-28 14:45:57 +00:00
parent aae94c8c80
commit aa7287bf64
2 changed files with 74 additions and 2 deletions

View File

@@ -22,6 +22,7 @@ from notifications_utils.recipients import (
from orderedset import OrderedSet
from werkzeug.routing import RequestRedirect
from xlrd.biffh import XLRDError
from xlrd.xldate import XLDateError
from app import (
current_service,
@@ -137,6 +138,13 @@ def send_messages(service_id, template_id):
flash('Couldnt read {}. Try using a different file format.'.format(
form.file.data.filename
))
except (XLDateError):
flash((
'{} contains numbers or dates that Notify cant understand. '
'Try formatting all columns as text or export your file as CSV.'
).format(
form.file.data.filename
))
column_headings = first_column_headings[template.template_type] + list(template.placeholders)

View File

@@ -5,6 +5,7 @@ from glob import glob
from io import BytesIO
from itertools import repeat
from os import path
from zipfile import BadZipFile
import pytest
from bs4 import BeautifulSoup
@@ -37,6 +38,13 @@ from tests.conftest import (
no_sms_senders,
normalize_spaces,
)
from xlrd.biffh import XLRDError
from xlrd.xldate import (
XLDateAmbiguous,
XLDateError,
XLDateNegative,
XLDateTooLarge,
)
template_types = ['email', 'sms']
@@ -278,6 +286,7 @@ def test_upload_files_in_different_formats(
data={'file': (BytesIO(uploaded.read()), filename)},
content_type='multipart/form-data'
)
page = BeautifulSoup(response.data.decode('utf-8'), 'html.parser')
if acceptable_file:
assert mock_s3_upload.call_args[0][1]['data'].strip() == (
@@ -288,9 +297,64 @@ def test_upload_files_in_different_formats(
)
else:
assert not mock_s3_upload.called
assert (
assert normalize_spaces(page.select_one('.banner-dangerous').text) == (
'Couldnt read {}. Try using a different file format.'.format(filename)
) in response.get_data(as_text=True)
)
@pytest.mark.parametrize('exception, expected_error_message', [
(partial(UnicodeDecodeError, 'codec', b'', 1, 2, 'reason'), (
'Couldnt read example.xlsx. Try using a different file format.'
)),
(BadZipFile, (
'Couldnt read example.xlsx. Try using a different file format.'
)),
(XLRDError, (
'Couldnt read example.xlsx. Try using a different file format.'
)),
(XLDateError, (
'example.xlsx contains numbers or dates that Notify cant understand. '
'Try formatting all columns as text or export your file as CSV.'
)),
(XLDateNegative, (
'example.xlsx contains numbers or dates that Notify cant understand. '
'Try formatting all columns as text or export your file as CSV.'
)),
(XLDateAmbiguous, (
'example.xlsx contains numbers or dates that Notify cant understand. '
'Try formatting all columns as text or export your file as CSV.'
)),
(XLDateTooLarge, (
'example.xlsx contains numbers or dates that Notify cant understand. '
'Try formatting all columns as text or export your file as CSV.'
)),
])
def test_shows_error_if_parsing_exception(
logged_in_client,
mocker,
mock_get_service_template,
exception,
expected_error_message,
):
def _raise_exception_or_partial_exception(file_content, filename):
raise exception()
mocker.patch(
'app.main.views.send.Spreadsheet.from_file',
side_effect=_raise_exception_or_partial_exception
)
response = logged_in_client.post(
url_for('main.send_messages', service_id=SERVICE_ONE_ID, template_id=fake_uuid),
data={'file': (BytesIO(b'example'), 'example.xlsx')},
content_type='multipart/form-data'
)
page = BeautifulSoup(response.data.decode('utf-8'), 'html.parser')
assert normalize_spaces(page.select_one('.banner-dangerous').text) == (
expected_error_message
)
def test_upload_csvfile_with_errors_shows_check_page_with_errors(