mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-05-04 16:11:11 -04:00
Accept common spreadsheet formats, not just CSV
We require users to export their spreadsheets as CSV files before uploading them. But this seems like the sort of thing a computer should be able to do. So this commit adds a wrapper class which: - takes a the uploaded file - returns it in a normalised format, or reads it using pyexcel[1] - gives the data back in CSV format This allows us to accept `.csv`, `.xlsx`, `.xls` (97 and 95), `.ods`, `.xlsm` and `.tsv` files. We can upload the resultant CSV just like normal, and process it for errors as before. Testing --- To test this I’ve added a selection of common spreadsheet files as test data. They all contain the same data, so the tests look to see that the resultant CSV output is the same for each. UI changes --- This commit doesn’t change the UI, apart from to give a different error message if a user uploads a file type that we still don’t understand. I intend to do this as a separate pull request, in order to fulfil https://www.pivotaltracker.com/story/show/119371637
This commit is contained in:
56
app/utils.py
56
app/utils.py
@@ -1,8 +1,14 @@
|
||||
import re
|
||||
import csv
|
||||
from io import StringIO
|
||||
from io import BytesIO, StringIO
|
||||
from os import path
|
||||
from functools import wraps
|
||||
from flask import (abort, session, request, url_for)
|
||||
import pyexcel
|
||||
import pyexcel.ext.io
|
||||
import pyexcel.ext.xls
|
||||
import pyexcel.ext.xlsx
|
||||
import pyexcel.ext.ods3
|
||||
|
||||
|
||||
class BrowsableItem(object):
|
||||
@@ -131,3 +137,51 @@ def email_safe(string):
|
||||
return "".join([
|
||||
character.lower() if character.isalnum() or character == "." else "" for character in re.sub("\s+", ".", string.strip()) # noqa
|
||||
])
|
||||
|
||||
|
||||
class Spreadsheet():
|
||||
|
||||
allowed_file_extensions = ['csv', 'xlsx', 'xls', 'ods', 'xlsm', 'tsv']
|
||||
|
||||
def __init__(self, filename, csv_data):
|
||||
self.filename = filename
|
||||
self.as_csv_data = csv_data
|
||||
self.as_dict = {
|
||||
'file_name': self.filename,
|
||||
'data': self.as_csv_data
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def can_handle(cls, filename):
|
||||
return cls.get_extension(filename) in cls.allowed_file_extensions
|
||||
|
||||
@staticmethod
|
||||
def get_extension(filename):
|
||||
return path.splitext(filename)[1].lower().lstrip('.')
|
||||
|
||||
@staticmethod
|
||||
def normalise_newlines(file_content):
|
||||
return '\r\n'.join(file_content.getvalue().decode('utf-8').splitlines())
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, filename, file_content):
|
||||
|
||||
extension = cls.get_extension(filename)
|
||||
|
||||
if extension == 'csv':
|
||||
return cls(filename, Spreadsheet.normalise_newlines(file_content))
|
||||
|
||||
if extension == 'tsv':
|
||||
file_content = StringIO(Spreadsheet.normalise_newlines(file_content))
|
||||
|
||||
with StringIO() as converted:
|
||||
|
||||
output = csv.writer(converted)
|
||||
|
||||
for row in pyexcel.get_sheet(
|
||||
file_type=extension,
|
||||
file_content=file_content.getvalue()
|
||||
).to_array():
|
||||
output.writerow(row)
|
||||
|
||||
return cls(filename, converted.getvalue())
|
||||
|
||||
Reference in New Issue
Block a user