diff --git a/app/main/validators.py b/app/main/validators.py index 64125fc1b..fe10d13f1 100644 --- a/app/main/validators.py +++ b/app/main/validators.py @@ -1,6 +1,7 @@ import re from wtforms import ValidationError from notifications_utils.template import Template +from app.utils import Spreadsheet class Blacklist(object): @@ -20,8 +21,8 @@ class CsvFileValidator(object): self.message = message def __call__(self, form, field): - if not field.data.filename.lower().endswith('.csv'): - raise ValidationError("{} is not a CSV file".format(field.data.filename)) + if not Spreadsheet.can_handle(field.data.filename): + raise ValidationError("{} isn’t a spreadsheet that Notify can read".format(field.data.filename)) class ValidEmailDomainRegex(object): diff --git a/app/main/views/send.py b/app/main/views/send.py index f8f165e00..10c62d9f5 100644 --- a/app/main/views/send.py +++ b/app/main/views/send.py @@ -27,7 +27,7 @@ from app.main.uploader import ( s3download ) from app import job_api_client, service_api_client, current_service, user_api_client, statistics_api_client -from app.utils import user_has_permissions, get_errors_for_csv +from app.utils import user_has_permissions, get_errors_for_csv, Spreadsheet def get_send_button_text(template_type, number_of_messages): @@ -112,10 +112,7 @@ def send_messages(service_id, template_id): s3upload( upload_id, service_id, - { - 'file_name': form.file.data.filename, - 'data': form.file.data.read().decode('utf-8') - }, + Spreadsheet.from_file(form.file.data.filename, form.file.data).as_dict, current_app.config['AWS_REGION'] ) session['upload_data'] = { diff --git a/app/utils.py b/app/utils.py index eeba867dc..36289aeff 100644 --- a/app/utils.py +++ b/app/utils.py @@ -1,8 +1,14 @@ import re import csv -from io import StringIO +from io import BytesIO, StringIO +from os import path from functools import wraps from flask import (abort, session, request, url_for) +import pyexcel +import pyexcel.ext.io +import pyexcel.ext.xls +import pyexcel.ext.xlsx +import pyexcel.ext.ods3 class BrowsableItem(object): @@ -131,3 +137,51 @@ def email_safe(string): return "".join([ character.lower() if character.isalnum() or character == "." else "" for character in re.sub("\s+", ".", string.strip()) # noqa ]) + + +class Spreadsheet(): + + allowed_file_extensions = ['csv', 'xlsx', 'xls', 'ods', 'xlsm', 'tsv'] + + def __init__(self, filename, csv_data): + self.filename = filename + self.as_csv_data = csv_data + self.as_dict = { + 'file_name': self.filename, + 'data': self.as_csv_data + } + + @classmethod + def can_handle(cls, filename): + return cls.get_extension(filename) in cls.allowed_file_extensions + + @staticmethod + def get_extension(filename): + return path.splitext(filename)[1].lower().lstrip('.') + + @staticmethod + def normalise_newlines(file_content): + return '\r\n'.join(file_content.getvalue().decode('utf-8').splitlines()) + + @classmethod + def from_file(cls, filename, file_content): + + extension = cls.get_extension(filename) + + if extension == 'csv': + return cls(filename, Spreadsheet.normalise_newlines(file_content)) + + if extension == 'tsv': + file_content = StringIO(Spreadsheet.normalise_newlines(file_content)) + + with StringIO() as converted: + + output = csv.writer(converted) + + for row in pyexcel.get_sheet( + file_type=extension, + file_content=file_content.getvalue() + ).to_array(): + output.writerow(row) + + return cls(filename, converted.getvalue()) diff --git a/requirements.txt b/requirements.txt index a3fbb420e..ac28fefaf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,11 @@ Pygments==2.0.2 py-gfm==0.1.2 blinker==1.4 monotonic==0.3 +pyexcel==0.2.1 +pyexcel-io==0.1.0 +pyexcel-xls==0.1.0 +pyexcel-xlsx==0.1.0 +pyexcel-ods3==0.1.1 git+https://github.com/alphagov/notifications-python-client.git@1.0.0#egg=notifications-python-client==1.0.0 diff --git a/tests/app/main/views/test_send.py b/tests/app/main/views/test_send.py index 0dc1ebf45..341bf97cf 100644 --- a/tests/app/main/views/test_send.py +++ b/tests/app/main/views/test_send.py @@ -1,6 +1,8 @@ import pytest import re from io import BytesIO +from os import path +from glob import glob from bs4 import BeautifulSoup from flask import url_for from unittest.mock import ANY @@ -8,6 +10,43 @@ from tests import validate_route_permission, job_json_with_created_by template_types = ['email', 'sms'] +# The * ignores hidden files, eg .DS_Store +test_spreadsheet_files = glob(path.join('tests', 'spreadsheet_files', '*')) + + +def test_that_test_files_exist(): + assert len(test_spreadsheet_files) == 8 + + +@pytest.mark.parametrize("filename", test_spreadsheet_files) +def test_upload_files_in_different_formats( + filename, + app_, + api_user_active, + mocker, + mock_login, + mock_get_service, + mock_get_service_template, + mock_s3_upload, + mock_has_permissions, + fake_uuid +): + + with app_.test_request_context(), app_.test_client() as client, open(filename, 'rb') as uploaded: + client.login(api_user_active) + client.post( + url_for('main.send_messages', service_id=fake_uuid, template_id=fake_uuid), + data={'file': (BytesIO(uploaded.read()), filename)}, + content_type='multipart/form-data' + ) + + assert mock_s3_upload.call_args[0][2]['data'].strip() == ( + "phone number,name,favourite colour,fruit\r\n" + "07739 468 050,Pete,Coral,tomato\r\n" + "07527 125 974,Not Pete,Magenta,Avacado\r\n" + "07512 058 823,Still Not Pete,Crimson,Pear" + ) + def test_upload_csvfile_with_errors_shows_check_page_with_errors( app_, @@ -74,7 +113,7 @@ def test_upload_csv_invalid_extension(app_, ) assert resp.status_code == 200 - assert "{} is not a CSV file".format(filename) in resp.get_data(as_text=True) + assert "{} isn’t a spreadsheet that Notify can read".format(filename) in resp.get_data(as_text=True) def test_send_test_sms_message( diff --git a/tests/spreadsheet_files/EXCEL_95.XLS b/tests/spreadsheet_files/EXCEL_95.XLS new file mode 100755 index 000000000..ed8f1c176 Binary files /dev/null and b/tests/spreadsheet_files/EXCEL_95.XLS differ diff --git a/tests/spreadsheet_files/excel 2007 with macro support.xlsm b/tests/spreadsheet_files/excel 2007 with macro support.xlsm new file mode 100755 index 000000000..9ca48786e Binary files /dev/null and b/tests/spreadsheet_files/excel 2007 with macro support.xlsm differ diff --git a/tests/spreadsheet_files/excel 2007.xlsx b/tests/spreadsheet_files/excel 2007.xlsx new file mode 100755 index 000000000..ce770874b Binary files /dev/null and b/tests/spreadsheet_files/excel 2007.xlsx differ diff --git a/tests/spreadsheet_files/excel_97.xls b/tests/spreadsheet_files/excel_97.xls new file mode 100755 index 000000000..b278a7563 Binary files /dev/null and b/tests/spreadsheet_files/excel_97.xls differ diff --git a/tests/spreadsheet_files/newline_unix.csv b/tests/spreadsheet_files/newline_unix.csv new file mode 100755 index 000000000..6130c8e78 --- /dev/null +++ b/tests/spreadsheet_files/newline_unix.csv @@ -0,0 +1,4 @@ +phone number,name,favourite colour,fruit +07739 468 050,Pete,Coral,tomato +07527 125 974,Not Pete,Magenta,Avacado +07512 058 823,Still Not Pete,Crimson,Pear diff --git a/tests/spreadsheet_files/newline_windows.csv b/tests/spreadsheet_files/newline_windows.csv new file mode 100755 index 000000000..9bed21a73 --- /dev/null +++ b/tests/spreadsheet_files/newline_windows.csv @@ -0,0 +1 @@ +phone number,name,favourite colour,fruit 07739 468 050,Pete,Coral,tomato 07527 125 974,Not Pete,Magenta,Avacado 07512 058 823,Still Not Pete,Crimson,Pear \ No newline at end of file diff --git a/tests/spreadsheet_files/open document spreadsheet.ods b/tests/spreadsheet_files/open document spreadsheet.ods new file mode 100755 index 000000000..9e0e181e1 Binary files /dev/null and b/tests/spreadsheet_files/open document spreadsheet.ods differ diff --git a/tests/spreadsheet_files/tab separated.tsv b/tests/spreadsheet_files/tab separated.tsv new file mode 100755 index 000000000..b2909b18f --- /dev/null +++ b/tests/spreadsheet_files/tab separated.tsv @@ -0,0 +1 @@ +phone number name favourite colour fruit 07739 468 050 Pete Coral tomato 07527 125 974 Not Pete Magenta Avacado 07512 058 823 Still Not Pete Crimson Pear \ No newline at end of file