mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-04-25 11:40:58 -04:00
Accept common spreadsheet formats, not just CSV
We require users to export their spreadsheets as CSV files before uploading them. But this seems like the sort of thing a computer should be able to do. So this commit adds a wrapper class which: - takes a the uploaded file - returns it in a normalised format, or reads it using pyexcel[1] - gives the data back in CSV format This allows us to accept `.csv`, `.xlsx`, `.xls` (97 and 95), `.ods`, `.xlsm` and `.tsv` files. We can upload the resultant CSV just like normal, and process it for errors as before. Testing --- To test this I’ve added a selection of common spreadsheet files as test data. They all contain the same data, so the tests look to see that the resultant CSV output is the same for each. UI changes --- This commit doesn’t change the UI, apart from to give a different error message if a user uploads a file type that we still don’t understand. I intend to do this as a separate pull request, in order to fulfil https://www.pivotaltracker.com/story/show/119371637
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import re
|
||||
from wtforms import ValidationError
|
||||
from notifications_utils.template import Template
|
||||
from app.utils import Spreadsheet
|
||||
|
||||
|
||||
class Blacklist(object):
|
||||
@@ -20,8 +21,8 @@ class CsvFileValidator(object):
|
||||
self.message = message
|
||||
|
||||
def __call__(self, form, field):
|
||||
if not field.data.filename.lower().endswith('.csv'):
|
||||
raise ValidationError("{} is not a CSV file".format(field.data.filename))
|
||||
if not Spreadsheet.can_handle(field.data.filename):
|
||||
raise ValidationError("{} isn’t a spreadsheet that Notify can read".format(field.data.filename))
|
||||
|
||||
|
||||
class ValidEmailDomainRegex(object):
|
||||
|
||||
@@ -27,7 +27,7 @@ from app.main.uploader import (
|
||||
s3download
|
||||
)
|
||||
from app import job_api_client, service_api_client, current_service, user_api_client, statistics_api_client
|
||||
from app.utils import user_has_permissions, get_errors_for_csv
|
||||
from app.utils import user_has_permissions, get_errors_for_csv, Spreadsheet
|
||||
|
||||
|
||||
def get_send_button_text(template_type, number_of_messages):
|
||||
@@ -112,10 +112,7 @@ def send_messages(service_id, template_id):
|
||||
s3upload(
|
||||
upload_id,
|
||||
service_id,
|
||||
{
|
||||
'file_name': form.file.data.filename,
|
||||
'data': form.file.data.read().decode('utf-8')
|
||||
},
|
||||
Spreadsheet.from_file(form.file.data.filename, form.file.data).as_dict,
|
||||
current_app.config['AWS_REGION']
|
||||
)
|
||||
session['upload_data'] = {
|
||||
|
||||
56
app/utils.py
56
app/utils.py
@@ -1,8 +1,14 @@
|
||||
import re
|
||||
import csv
|
||||
from io import StringIO
|
||||
from io import BytesIO, StringIO
|
||||
from os import path
|
||||
from functools import wraps
|
||||
from flask import (abort, session, request, url_for)
|
||||
import pyexcel
|
||||
import pyexcel.ext.io
|
||||
import pyexcel.ext.xls
|
||||
import pyexcel.ext.xlsx
|
||||
import pyexcel.ext.ods3
|
||||
|
||||
|
||||
class BrowsableItem(object):
|
||||
@@ -131,3 +137,51 @@ def email_safe(string):
|
||||
return "".join([
|
||||
character.lower() if character.isalnum() or character == "." else "" for character in re.sub("\s+", ".", string.strip()) # noqa
|
||||
])
|
||||
|
||||
|
||||
class Spreadsheet():
|
||||
|
||||
allowed_file_extensions = ['csv', 'xlsx', 'xls', 'ods', 'xlsm', 'tsv']
|
||||
|
||||
def __init__(self, filename, csv_data):
|
||||
self.filename = filename
|
||||
self.as_csv_data = csv_data
|
||||
self.as_dict = {
|
||||
'file_name': self.filename,
|
||||
'data': self.as_csv_data
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, filename):
|
||||
return cls.get_extension(filename) in cls.allowed_file_extensions
|
||||
|
||||
@staticmethod
|
||||
def get_extension(filename):
|
||||
return path.splitext(filename)[1].lower().lstrip('.')
|
||||
|
||||
@staticmethod
|
||||
def normalise_newlines(file_content):
|
||||
return '\r\n'.join(file_content.getvalue().decode('utf-8').splitlines())
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, filename, file_content):
|
||||
|
||||
extension = cls.get_extension(filename)
|
||||
|
||||
if extension == 'csv':
|
||||
return cls(filename, Spreadsheet.normalise_newlines(file_content))
|
||||
|
||||
if extension == 'tsv':
|
||||
file_content = StringIO(Spreadsheet.normalise_newlines(file_content))
|
||||
|
||||
with StringIO() as converted:
|
||||
|
||||
output = csv.writer(converted)
|
||||
|
||||
for row in pyexcel.get_sheet(
|
||||
file_type=extension,
|
||||
file_content=file_content.getvalue()
|
||||
).to_array():
|
||||
output.writerow(row)
|
||||
|
||||
return cls(filename, converted.getvalue())
|
||||
|
||||
@@ -8,6 +8,11 @@ Pygments==2.0.2
|
||||
py-gfm==0.1.2
|
||||
blinker==1.4
|
||||
monotonic==0.3
|
||||
pyexcel==0.2.1
|
||||
pyexcel-io==0.1.0
|
||||
pyexcel-xls==0.1.0
|
||||
pyexcel-xlsx==0.1.0
|
||||
pyexcel-ods3==0.1.1
|
||||
|
||||
git+https://github.com/alphagov/notifications-python-client.git@1.0.0#egg=notifications-python-client==1.0.0
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import pytest
|
||||
import re
|
||||
from io import BytesIO
|
||||
from os import path
|
||||
from glob import glob
|
||||
from bs4 import BeautifulSoup
|
||||
from flask import url_for
|
||||
from unittest.mock import ANY
|
||||
@@ -8,6 +10,43 @@ from tests import validate_route_permission, job_json_with_created_by
|
||||
|
||||
template_types = ['email', 'sms']
|
||||
|
||||
# The * ignores hidden files, eg .DS_Store
|
||||
test_spreadsheet_files = glob(path.join('tests', 'spreadsheet_files', '*'))
|
||||
|
||||
|
||||
def test_that_test_files_exist():
|
||||
assert len(test_spreadsheet_files) == 8
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename", test_spreadsheet_files)
|
||||
def test_upload_files_in_different_formats(
|
||||
filename,
|
||||
app_,
|
||||
api_user_active,
|
||||
mocker,
|
||||
mock_login,
|
||||
mock_get_service,
|
||||
mock_get_service_template,
|
||||
mock_s3_upload,
|
||||
mock_has_permissions,
|
||||
fake_uuid
|
||||
):
|
||||
|
||||
with app_.test_request_context(), app_.test_client() as client, open(filename, 'rb') as uploaded:
|
||||
client.login(api_user_active)
|
||||
client.post(
|
||||
url_for('main.send_messages', service_id=fake_uuid, template_id=fake_uuid),
|
||||
data={'file': (BytesIO(uploaded.read()), filename)},
|
||||
content_type='multipart/form-data'
|
||||
)
|
||||
|
||||
assert mock_s3_upload.call_args[0][2]['data'].strip() == (
|
||||
"phone number,name,favourite colour,fruit\r\n"
|
||||
"07739 468 050,Pete,Coral,tomato\r\n"
|
||||
"07527 125 974,Not Pete,Magenta,Avacado\r\n"
|
||||
"07512 058 823,Still Not Pete,Crimson,Pear"
|
||||
)
|
||||
|
||||
|
||||
def test_upload_csvfile_with_errors_shows_check_page_with_errors(
|
||||
app_,
|
||||
@@ -74,7 +113,7 @@ def test_upload_csv_invalid_extension(app_,
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
assert "{} is not a CSV file".format(filename) in resp.get_data(as_text=True)
|
||||
assert "{} isn’t a spreadsheet that Notify can read".format(filename) in resp.get_data(as_text=True)
|
||||
|
||||
|
||||
def test_send_test_sms_message(
|
||||
|
||||
BIN
tests/spreadsheet_files/EXCEL_95.XLS
Executable file
BIN
tests/spreadsheet_files/EXCEL_95.XLS
Executable file
Binary file not shown.
BIN
tests/spreadsheet_files/excel 2007 with macro support.xlsm
Executable file
BIN
tests/spreadsheet_files/excel 2007 with macro support.xlsm
Executable file
Binary file not shown.
BIN
tests/spreadsheet_files/excel 2007.xlsx
Executable file
BIN
tests/spreadsheet_files/excel 2007.xlsx
Executable file
Binary file not shown.
BIN
tests/spreadsheet_files/excel_97.xls
Executable file
BIN
tests/spreadsheet_files/excel_97.xls
Executable file
Binary file not shown.
4
tests/spreadsheet_files/newline_unix.csv
Executable file
4
tests/spreadsheet_files/newline_unix.csv
Executable file
@@ -0,0 +1,4 @@
|
||||
phone number,name,favourite colour,fruit
|
||||
07739 468 050,Pete,Coral,tomato
|
||||
07527 125 974,Not Pete,Magenta,Avacado
|
||||
07512 058 823,Still Not Pete,Crimson,Pear
|
||||
|
1
tests/spreadsheet_files/newline_windows.csv
Executable file
1
tests/spreadsheet_files/newline_windows.csv
Executable file
@@ -0,0 +1 @@
|
||||
phone number,name,favourite colour,fruit
|
||||
|
BIN
tests/spreadsheet_files/open document spreadsheet.ods
Executable file
BIN
tests/spreadsheet_files/open document spreadsheet.ods
Executable file
Binary file not shown.
1
tests/spreadsheet_files/tab separated.tsv
Executable file
1
tests/spreadsheet_files/tab separated.tsv
Executable file
@@ -0,0 +1 @@
|
||||
phone number name favourite colour fruit
|
||||
|
Reference in New Issue
Block a user