Files
notifications-admin/app/models/spreadsheet.py
Chris Hill-Scott 80854ab2cc Force xlsm files to open with pyexcel-xlsx
`.xlsm` files are like `.xlxs` files but with macros enabled. They store
data in the same XML-based format as `.xlsx` files.

Pyexcel will try to use the xlrd package to parse `.xlsm` files. This
used to work because xlrd used to support reading `.xlsx` files. xlrd
has dropped support for `.xlsx` files in version 2 because of security
concerns. This means that when pyexcel asks xlrd to parse a `.xlsm` file
it causes an error.

This commit adds some branching to force `.xlsm` files to be opened
with pyexcel-xlsx instead, which does support `.xlsx` files.
2022-05-13 13:17:55 +01:00

99 lines
2.7 KiB
Python

import csv
from io import StringIO
from os import path
import pyexcel
import pyexcel_xlsx
class Spreadsheet():
ALLOWED_FILE_EXTENSIONS = ('csv', 'xlsx', 'xls', 'ods', 'xlsm', 'tsv')
def __init__(self, csv_data=None, rows=None, filename=''):
self.filename = filename
if csv_data and rows:
raise TypeError('Spreadsheet must be created from either rows or CSV data')
self._csv_data = csv_data or ''
self._rows = rows or []
@property
def as_dict(self):
return {
'file_name': self.filename,
'data': self.as_csv_data
}
@property
def as_csv_data(self):
if not self._csv_data:
with StringIO() as converted:
output = csv.writer(converted)
for row in self._rows:
output.writerow(row)
self._csv_data = converted.getvalue()
return self._csv_data
@classmethod
def can_handle(cls, filename):
return cls.get_extension(filename) in cls.ALLOWED_FILE_EXTENSIONS
@staticmethod
def get_extension(filename):
return path.splitext(filename)[1].lower().lstrip('.')
@staticmethod
def normalise_newlines(file_content):
return '\r\n'.join(file_content.read().decode('utf-8').splitlines())
@classmethod
def from_rows(cls, rows, filename=''):
return cls(rows=rows, filename=filename)
@classmethod
def from_dict(cls, dictionary, filename=''):
return cls.from_rows(
zip(
*sorted(dictionary.items(), key=lambda pair: pair[0])
),
filename=filename,
)
@classmethod
def from_file(cls, file_content, filename=''):
extension = cls.get_extension(filename)
if extension == 'csv':
return cls(csv_data=Spreadsheet.normalise_newlines(file_content), filename=filename)
if extension == 'tsv':
file_content = StringIO(
Spreadsheet.normalise_newlines(file_content))
if extension == 'xlsm':
file_data = pyexcel_xlsx.get_data(file_content)
instance = cls.from_rows(
# Get the first sheet from the workbook
list(file_data.values())[0],
filename,
)
return instance
instance = cls.from_rows(
pyexcel.iget_array(
file_type=extension,
file_stream=file_content),
filename)
pyexcel.free_resources()
return instance
@classmethod
def from_file_form(cls, form):
return cls.from_file(
form.file.data,
filename=form.file.data.filename,
)