Merge pull request #241 from alphagov/utils-csv-processing

Give the user better error messages for CSV files
This commit is contained in:
Chris Hill-Scott
2016-03-09 11:19:20 +00:00
17 changed files with 336 additions and 487 deletions

View File

@@ -16,7 +16,7 @@ from wtforms.validators import DataRequired, Email, Length, Regexp
from app.main.validators import Blacklist, CsvFileValidator
from app.utils import (
from utils.recipients import (
validate_phone_number,
format_phone_number,
InvalidPhoneError
@@ -37,19 +37,10 @@ class UKMobileNumber(TelField):
def pre_validate(self, form):
try:
self.data = validate_phone_number(self.data)
validate_phone_number(self.data)
except InvalidPhoneError as e:
raise ValidationError(e.message)
def post_validate(self, form, validation_stopped):
if len(self.data) != 9:
return
# TODO implement in the render field method.
# API's require no spaces in the number
# self.data = '+44 7{} {} {}'.format(*re.findall('...', self.data))
self.data = format_phone_number(self.data)
def mobile_number():
return UKMobileNumber('Mobile phone number',

View File

@@ -8,7 +8,7 @@ BUCKET_NAME = 'service-{}-notify'
def s3upload(upload_id, service_id, filedata, region):
s3 = resource('s3')
bucket_name = BUCKET_NAME.format(service_id)
contents = '\n'.join(filedata['data'])
contents = filedata['data']
exists = True
try:

View File

@@ -1,6 +1,7 @@
import csv
import io
import uuid
from contextlib import suppress
from flask import (
request,
@@ -15,7 +16,8 @@ from flask import (
from flask_login import login_required, current_user
from notifications_python_client.errors import HTTPError
from utils.template import Template, NeededByTemplateError, NoPlaceholderForDataError
from utils.template import Template
from utils.recipients import RecipientCSV, first_column_heading
from app.main import main
from app.main.forms import CsvUploadForm
@@ -26,10 +28,7 @@ from app.main.uploader import (
from app.main.dao import templates_dao
from app.main.dao import services_dao
from app import job_api_client
from app.utils import (
validate_recipient, validate_header_row, InvalidPhoneError, InvalidEmailError, user_has_permissions,
InvalidHeaderError)
from utils.process_csv import first_column_heading
from app.utils import user_has_permissions, get_errors_for_csv
send_messages_page_headings = {
@@ -100,16 +99,25 @@ def send_messages(service_id, template_id):
form = CsvUploadForm()
if form.validate_on_submit():
try:
csv_file = form.file
filedata = _get_filedata(csv_file)
upload_id = str(uuid.uuid4())
s3upload(upload_id, service_id, filedata, current_app.config['AWS_REGION'])
session['upload_data'] = {"template_id": template_id, "original_file_name": filedata['file_name']}
s3upload(
upload_id,
service_id,
{
'file_name': form.file.data.filename,
'data': form.file.data.getvalue().decode('utf-8')
},
current_app.config['AWS_REGION']
)
session['upload_data'] = {
"template_id": template_id,
"original_file_name": form.file.data.filename
}
return redirect(url_for('.check_messages',
service_id=service_id,
upload_id=upload_id))
except ValueError as e:
flash('There was a problem uploading: {}'.format(csv_file.data.filename))
flash('There was a problem uploading: {}'.format(form.file.data.filename))
flash(str(e))
return redirect(url_for('.send_messages', service_id=service_id, template_id=template_id))
@@ -118,7 +126,6 @@ def send_messages(service_id, template_id):
templates_dao.get_service_template_or_404(service_id, template_id)['data'],
prefix=service['name']
)
recipient_column = first_column_heading[template.template_type]
return render_template(
'views/send.html',
@@ -174,7 +181,7 @@ def send_message_to_self(service_id, template_id):
filedata = {
'file_name': 'Test run',
'data': output.getvalue().splitlines()
'data': output.getvalue()
}
upload_id = str(uuid.uuid4())
s3upload(upload_id, service_id, filedata, current_app.config['AWS_REGION'])
@@ -185,107 +192,75 @@ def send_message_to_self(service_id, template_id):
upload_id=upload_id))
@main.route("/services/<service_id>/check/<upload_id>",
methods=['GET', 'POST'])
@main.route("/services/<service_id>/check/<upload_id>", methods=['GET'])
@login_required
@user_has_permissions('send_texts', 'send_emails', 'send_letters')
def check_messages(service_id, upload_id):
upload_data = session['upload_data']
template_id = upload_data.get('template_id')
service = services_dao.get_service_by_id_or_404(service_id)
if request.method == 'GET':
contents = s3download(service_id, upload_id)
if not contents:
flash('There was a problem reading your upload file')
raw_template = templates_dao.get_service_template_or_404(service_id, template_id)['data']
upload_result = _get_rows(contents, raw_template)
session['upload_data']['notification_count'] = len(upload_result['rows'])
template = Template(
raw_template,
values=upload_result['rows'][0] if upload_result['valid'] else {},
drop_values={first_column_heading[raw_template['template_type']]},
prefix=service['name']
)
return render_template(
'views/check.html',
upload_result=upload_result,
template=template,
page_heading=get_page_headings(template.template_type),
column_headers=[first_column_heading[template.template_type]] + list(template.placeholders_as_markup),
original_file_name=upload_data.get('original_file_name'),
service_id=service_id,
service=service,
form=CsvUploadForm()
)
elif request.method == 'POST':
if request.files:
# The csv was invalid, validate the csv again
return send_messages(service_id, template_id)
contents = s3download(service_id, upload_id)
if not contents:
flash('There was a problem reading your upload file')
original_file_name = upload_data.get('original_file_name')
notification_count = upload_data.get('notification_count')
session.pop('upload_data')
try:
job_api_client.create_job(upload_id, service_id, template_id, original_file_name, notification_count)
except HTTPError as e:
if e.status_code == 404:
abort(404)
else:
raise e
return redirect(
url_for('main.view_job', service_id=service_id, job_id=upload_id)
)
def _get_filedata(file):
import itertools
reader = csv.reader(
file.data.getvalue().decode('utf-8').splitlines(),
quoting=csv.QUOTE_NONE,
skipinitialspace=True
template = Template(
templates_dao.get_service_template_or_404(
service_id,
session['upload_data'].get('template_id')
)['data'],
prefix=service['name']
)
lines = []
for row in reader:
non_empties = itertools.dropwhile(lambda x: x.strip() == '', row)
has_content = []
for item in non_empties:
has_content.append(item)
if has_content:
lines.append(row)
if len(lines) < 2: # must be header row and at least one data row
message = 'The file {} contained no data'.format(file.data.filename)
raise ValueError(message)
content_lines = []
for row in lines:
content_lines.append(','.join(row).rstrip(','))
return {'file_name': file.data.filename, 'data': content_lines}
def _get_rows(contents, raw_template):
reader = csv.DictReader(
contents.split('\n'),
quoting=csv.QUOTE_NONE,
skipinitialspace=True
recipients = RecipientCSV(
contents,
template_type=template.template_type,
placeholders=template.placeholders,
max_initial_rows_shown=5
)
with suppress(StopIteration):
template.values = next(recipients.rows)
session['upload_data']['notification_count'] = len(list(recipients.rows))
session['upload_data']['valid'] = not recipients.has_errors
return render_template(
'views/check.html',
recipients=recipients,
template=template,
page_heading=get_page_headings(template.template_type),
errors=get_errors_for_csv(recipients, template.template_type),
count_of_recipients=session['upload_data']['notification_count'],
count_of_displayed_recipients=len(list(recipients.rows_annotated_and_truncated)),
original_file_name=session['upload_data'].get('original_file_name'),
service_id=service_id,
service=service,
form=CsvUploadForm()
)
@main.route("/services/<service_id>/check/<upload_id>", methods=['POST'])
@login_required
@user_has_permissions('send_texts', 'send_emails', 'send_letters')
def start_job(service_id, upload_id):
upload_data = session['upload_data']
services_dao.get_service_by_id_or_404(service_id)
if request.files or not upload_data.get('valid'):
# The csv was invalid, validate the csv again
return send_messages(service_id, upload_data.get('template_id'))
session.pop('upload_data')
job_api_client.create_job(
upload_id,
service_id,
upload_data.get('template_id'),
upload_data.get('original_file_name'),
upload_data.get('notification_count')
)
return redirect(
url_for('main.view_job', service_id=service_id, job_id=upload_id)
)
valid = True
rows = []
for row in reader:
rows.append(row)
try:
validate_recipient(
row, template_type=raw_template['template_type']
)
Template(
raw_template,
values=row,
drop_values={first_column_heading[raw_template['template_type']]}
).replaced
except (InvalidEmailError, InvalidPhoneError, NeededByTemplateError,
NoPlaceholderForDataError, InvalidHeaderError):
valid = False
return {"valid": valid, "rows": rows}