mirror of
https://github.com/GSA/notifications-admin.git
synced 2026-02-05 10:53:28 -05:00
`.xlsm` files are like `.xlxs` files but with macros enabled. They store data in the same XML-based format as `.xlsx` files. Pyexcel will try to use the xlrd package to parse `.xlsm` files. This used to work because xlrd used to support reading `.xlsx` files. xlrd has dropped support for `.xlsx` files in version 2 because of security concerns. This means that when pyexcel asks xlrd to parse a `.xlsm` file it causes an error. This commit adds some branching to force `.xlsm` files to be opened with pyexcel-xlsx instead, which does support `.xlsx` files.
99 lines
2.7 KiB
Python
99 lines
2.7 KiB
Python
import csv
|
|
from io import StringIO
|
|
from os import path
|
|
|
|
import pyexcel
|
|
import pyexcel_xlsx
|
|
|
|
|
|
class Spreadsheet():
|
|
|
|
ALLOWED_FILE_EXTENSIONS = ('csv', 'xlsx', 'xls', 'ods', 'xlsm', 'tsv')
|
|
|
|
def __init__(self, csv_data=None, rows=None, filename=''):
|
|
|
|
self.filename = filename
|
|
|
|
if csv_data and rows:
|
|
raise TypeError('Spreadsheet must be created from either rows or CSV data')
|
|
|
|
self._csv_data = csv_data or ''
|
|
self._rows = rows or []
|
|
|
|
@property
|
|
def as_dict(self):
|
|
return {
|
|
'file_name': self.filename,
|
|
'data': self.as_csv_data
|
|
}
|
|
|
|
@property
|
|
def as_csv_data(self):
|
|
if not self._csv_data:
|
|
with StringIO() as converted:
|
|
output = csv.writer(converted)
|
|
for row in self._rows:
|
|
output.writerow(row)
|
|
self._csv_data = converted.getvalue()
|
|
return self._csv_data
|
|
|
|
@classmethod
|
|
def can_handle(cls, filename):
|
|
return cls.get_extension(filename) in cls.ALLOWED_FILE_EXTENSIONS
|
|
|
|
@staticmethod
|
|
def get_extension(filename):
|
|
return path.splitext(filename)[1].lower().lstrip('.')
|
|
|
|
@staticmethod
|
|
def normalise_newlines(file_content):
|
|
return '\r\n'.join(file_content.read().decode('utf-8').splitlines())
|
|
|
|
@classmethod
|
|
def from_rows(cls, rows, filename=''):
|
|
return cls(rows=rows, filename=filename)
|
|
|
|
@classmethod
|
|
def from_dict(cls, dictionary, filename=''):
|
|
return cls.from_rows(
|
|
zip(
|
|
*sorted(dictionary.items(), key=lambda pair: pair[0])
|
|
),
|
|
filename=filename,
|
|
)
|
|
|
|
@classmethod
|
|
def from_file(cls, file_content, filename=''):
|
|
extension = cls.get_extension(filename)
|
|
|
|
if extension == 'csv':
|
|
return cls(csv_data=Spreadsheet.normalise_newlines(file_content), filename=filename)
|
|
|
|
if extension == 'tsv':
|
|
file_content = StringIO(
|
|
Spreadsheet.normalise_newlines(file_content))
|
|
|
|
if extension == 'xlsm':
|
|
file_data = pyexcel_xlsx.get_data(file_content)
|
|
instance = cls.from_rows(
|
|
# Get the first sheet from the workbook
|
|
list(file_data.values())[0],
|
|
filename,
|
|
)
|
|
return instance
|
|
|
|
instance = cls.from_rows(
|
|
pyexcel.iget_array(
|
|
file_type=extension,
|
|
file_stream=file_content),
|
|
filename)
|
|
pyexcel.free_resources()
|
|
return instance
|
|
|
|
@classmethod
|
|
def from_file_form(cls, form):
|
|
return cls.from_file(
|
|
form.file.data,
|
|
filename=form.file.data.filename,
|
|
)
|