Merge pull request #1070 from alphagov/return-streamed-csv

Fix timeout on download of large CSV reports
This commit is contained in:
imdadahad
2017-01-16 10:34:25 +00:00
committed by GitHub
7 changed files with 256 additions and 144 deletions

View File

@@ -311,7 +311,7 @@ def format_notification_status(status, template_type):
'sending': 'Sending',
'created': 'Sending'
}
}.get(template_type).get(status, status)
}[template_type].get(status, status)
def format_notification_status_as_time(status, created, updated):

View File

@@ -12,7 +12,8 @@ from flask import (
request,
url_for,
current_app,
redirect
redirect,
Response
)
from flask_login import login_required
from werkzeug.datastructures import MultiDict
@@ -103,7 +104,6 @@ def view_job(service_id, job_id):
total_notifications = job.get('notification_count', 0)
processed_notifications = job.get('notifications_delivered', 0) + job.get('notifications_failed', 0)
return render_template(
'views/jobs/job.html',
finished=(total_notifications == processed_notifications),
@@ -142,23 +142,20 @@ def view_job_csv(service_id, job_id):
filter_args = _parse_filter_args(request.args)
filter_args['status'] = _set_status_filters(filter_args)
return (
return Response(
generate_notifications_csv(
notification_api_client.get_notifications_for_service(
service_id,
job_id,
status=filter_args.get('status'),
page_size=job['notification_count']
)['notifications']
service_id=service_id,
job_id=job_id,
status=filter_args.get('status'),
page=request.args.get('page', 1),
page_size=5000
),
200,
{
'Content-Type': 'text/csv; charset=utf-8',
mimetype='text/csv',
headers={
'Content-Disposition': 'inline; filename="{} - {}.csv"'.format(
template['name'],
format_datetime_short(job['created_at'])
)
}
)}
)
@@ -208,9 +205,22 @@ def get_notifications(service_id, message_type, status_override=None):
abort(404, "Invalid page argument ({}) reverting to page 1.".format(request.args['page'], None))
if message_type not in ['email', 'sms']:
abort(404)
filter_args = _parse_filter_args(request.args)
filter_args['status'] = _set_status_filters(filter_args)
if request.path.endswith('csv'):
return Response(
generate_notifications_csv(
service_id=service_id,
page=page,
page_size=5000,
template_type=[message_type],
status=filter_args.get('status'),
limit_days=current_app.config['ACTIVITY_STATS_LIMIT_DAYS']
),
mimetype='text/csv',
headers={
'Content-Disposition': 'inline; filename="notifications.csv"'}
)
notifications = notification_api_client.get_notifications_for_service(
service_id=service_id,
@@ -224,25 +234,14 @@ def get_notifications(service_id, message_type, status_override=None):
'status': request.args.get('status')
}
prev_page = None
if notifications['links'].get('prev', None):
prev_page = generate_previous_dict('main.view_notifications', service_id, page, url_args=url_args)
next_page = None
if notifications['links'].get('next', None):
next_page = generate_next_dict('main.view_notifications', service_id, page, url_args)
if request.path.endswith('csv'):
csv_content = generate_notifications_csv(
notification_api_client.get_notifications_for_service(
service_id=service_id,
page=page,
page_size=notifications['total'],
template_type=[message_type],
status=filter_args.get('status'),
limit_days=current_app.config['ACTIVITY_STATS_LIMIT_DAYS'])['notifications'])
return csv_content, 200, {
'Content-Type': 'text/csv; charset=utf-8',
'Content-Disposition': 'inline; filename="notifications.csv"'
}
return {
'counts': render_template(
'views/activity/counts.html',
@@ -296,7 +295,7 @@ def get_status_filters(service, message_type, statistics):
stats[key]
)
for key, label, option in filters
]
]
def _get_job_counts(job, help_argument):

View File

@@ -5,7 +5,14 @@ from os import path
from functools import wraps
import unicodedata
from flask import (abort, current_app, session, request, redirect, url_for)
from flask import (
abort,
current_app,
redirect,
request,
session,
url_for
)
from flask_login import current_user
from wand.image import Image
@@ -111,24 +118,61 @@ def get_errors_for_csv(recipients, template_type):
return errors
def generate_notifications_csv(json_list):
def generate_notifications_csv(**kwargs):
from app import notification_api_client
csvfile = StringIO()
csvwriter = csv.DictWriter(
csvfile,
fieldnames=['Row number', 'Recipient', 'Template', 'Type', 'Job', 'Status', 'Time']
)
def read_current_row():
csvfile.seek(0)
line = csvfile.read()
return line
def clear_file_buffer():
csvfile.seek(0)
csvfile.truncate()
# Initiate download quicker by returning the headers first
csvwriter.writeheader()
line = read_current_row()
clear_file_buffer()
yield line
# get_notifications_for_service is always paginated by the api, so set a page param if not specified
if 'page' not in kwargs:
kwargs['page'] = 1
while kwargs['page']:
notifications_resp = notification_api_client.get_notifications_for_service(**kwargs)
notifications_list = notifications_resp['notifications']
for x in notifications_list:
csvwriter.writerow(format_notification_for_csv(x))
line = read_current_row()
clear_file_buffer()
yield line
if notifications_resp['links'].get('next'):
kwargs['page'] += 1
else:
return
def format_notification_for_csv(notification):
from app import format_datetime_24h, format_notification_status
content = StringIO()
retval = None
with content as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(['Row number', 'Recipient', 'Template', 'Type', 'Job', 'Status', 'Time'])
for x in json_list:
csvwriter.writerow([
int(x['job_row_number']) + 2 if 'job_row_number' in x and x['job_row_number'] else '',
x['to'],
x['template']['name'],
x['template']['template_type'],
x['job']['original_file_name'] if x['job'] else '',
format_notification_status(x['status'], x['template']['template_type']),
format_datetime_24h(x['created_at'])])
retval = content.getvalue()
return retval
return {
'Row number': int(notification['job_row_number']) + 2 if notification.get('job_row_number') else '',
'Recipient': notification['to'],
'Template': notification['template']['name'],
'Type': notification['template']['template_type'],
'Job': notification['job']['original_file_name'] if notification['job'] else '',
'Status': format_notification_status(notification['status'], notification['template']['template_type']),
'Time': format_datetime_24h(notification['created_at'])
}
def get_page_from_request():

View File

@@ -1,6 +1,8 @@
import csv
import pytest
import uuid
from datetime import datetime, timedelta, date, timezone
from datetime import datetime, timedelta, timezone
from flask.testing import FlaskClient
from flask import url_for
from flask_login import login_user
@@ -219,12 +221,14 @@ def notification_json(
if status is None:
status = 'delivered'
links = {}
if with_links:
links = {
'prev': '/service/{}/notifications'.format(service_id),
'next': '/service/{}/notifications'.format(service_id),
'last': '/service/{}/notifications'.format(service_id)
'prev': '/service/{}/notifications?page=0'.format(service_id),
'next': '/service/{}/notifications?page=1'.format(service_id),
'last': '/service/{}/notifications?page=2'.format(service_id)
}
job_payload = None
if job:
job_payload = {'id': job['id'], 'original_file_name': job['original_file_name']}
@@ -277,6 +281,7 @@ def single_notification_json(
data = {
'sent_at': sent_at,
'to': '07123456789',
'billable_units': 1,
'status': status,
'created_at': created_at,

View File

@@ -1,3 +1,4 @@
import csv
import json
import uuid
from urllib.parse import urlparse, quote, parse_qs
@@ -6,12 +7,32 @@ import pytest
from flask import url_for
from bs4 import BeautifulSoup
from app.utils import generate_notifications_csv
from app import utils
from io import StringIO
from app.main.views.jobs import get_time_left, get_status_filters
from tests import notification_json
from freezegun import freeze_time
def _csv_notifications(notifications_json):
csvfile = StringIO()
csvwriter = csv.writer(csvfile)
from app import format_datetime_24h, format_notification_status
csvwriter.writerow(['Row number', 'Recipient', 'Template', 'Type', 'Job', 'Status', 'Time'])
for x in notifications_json:
csvwriter.writerow([
int(x['job_row_number']) + 2 if 'job_row_number' in x and x['job_row_number'] else '',
x['to'],
x['template']['name'],
x['template']['template_type'],
x['job']['original_file_name'] if x['job'] else '',
format_notification_status(x['status'], x['template']['template_type']),
format_datetime_24h(x['created_at'])
])
return csvfile.getvalue()
def test_get_jobs_should_return_list_of_all_real_jobs(
client,
service_one,
@@ -111,7 +132,6 @@ def test_should_show_page_for_one_job(
)
assert csv_link.text == 'Download this report'
assert page.find('span', {'id': 'time-left'}).text == 'Data available for 7 days'
assert page.find('p', {'class': 'table-show-more-link'}).text.strip() == 'Only showing the first 50 rows'
mock_get_notifications.assert_called_with(
service_one['id'],
fake_uuid,
@@ -119,6 +139,26 @@ def test_should_show_page_for_one_job(
)
def test_get_jobs_should_tell_user_if_more_than_one_page(
logged_in_client,
fake_uuid,
service_one,
mock_get_job,
mock_get_service_template,
mock_get_notifications_with_previous_next
):
response = logged_in_client.get(url_for(
'main.view_job',
service_id=service_one['id'],
job_id=fake_uuid,
status=''
))
assert response.status_code == 200
page = BeautifulSoup(response.data.decode('utf-8'), 'html.parser')
assert page.find('p', {'class': 'table-show-more-link'}).text.strip() == 'Only showing the first 50 rows'
def test_should_show_job_in_progress(
app_,
service_one,
@@ -317,6 +357,7 @@ def test_can_show_notifications(
page_argument,
expected_page_argument
):
# todo refactor, possibly consider deleting?
with app_.test_request_context():
with app_.test_client() as client:
client.login(active_user_with_permissions, mocker, service_one)
@@ -328,7 +369,6 @@ def test_can_show_notifications(
page=page_argument))
assert response.status_code == 200
content = response.get_data(as_text=True)
notifications = notification_json(service_one['id'])
notification = notifications['notifications'][0]
assert notification['to'] in content
@@ -343,7 +383,6 @@ def test_can_show_notifications(
message_type=message_type,
status=status_argument
) == page.findAll("a", {"download": "download"})[0]['href']
path_to_json = page.find("div", {'data-key': 'notifications'})['data-resource']
url = urlparse(path_to_json)
@@ -365,12 +404,32 @@ def test_can_show_notifications(
csv_response = client.get(url_for(
'main.view_notifications_csv',
service_id=service_one['id'],
message_type='email',
message_type=message_type,
download='csv'
))
csv_content = generate_notifications_csv(
mock_get_notifications(service_one['id'])['notifications']
notifications_json = mock_get_notifications(service_one['id'], template_type=[message_type])['notifications']
notifications_as_csv = _csv_notifications(notifications_json)
mock_notifications_as_csv = mocker.patch('app.utils.generate_notifications_csv',
return_value=notifications_as_csv)
csv_content = utils.generate_notifications_csv(
limit_days=7,
page=expected_page_argument,
service_id=service_one['id'],
status=expected_api_call,
template_type=[message_type]
)
mock_notifications_as_csv.assert_called_with(
limit_days=7,
page=expected_page_argument,
service_id=service_one['id'],
status=expected_api_call,
template_type=[message_type]
)
assert csv_response.status_code == 200
assert csv_response.get_data(as_text=True) == csv_content
assert 'text/csv' in csv_response.headers['Content-Type']
@@ -421,33 +480,6 @@ def test_should_show_notifications_for_a_service_with_next_previous(
assert 'page 1' in prev_page_link.text.strip()
@freeze_time("2016-01-01 11:09:00.061258")
def test_should_download_notifications_for_a_job(app_,
api_user_active,
mock_login,
mock_get_service,
mock_get_job,
mock_get_notifications,
mock_get_template_version,
mock_has_permissions,
fake_uuid):
with app_.test_request_context():
with app_.test_client() as client:
client.login(api_user_active)
response = client.get(url_for(
'main.view_job_csv',
service_id=fake_uuid,
job_id=fake_uuid,
))
csv_content = generate_notifications_csv(
mock_get_notifications(fake_uuid, job_id=fake_uuid)['notifications']
)
assert response.status_code == 200
assert response.get_data(as_text=True) == csv_content
assert 'text/csv' in response.headers['Content-Type']
assert 'sample template - 1 January at 11:09am.csv"' in response.headers['Content-Disposition']
@pytest.mark.parametrize(
"job_created_at, expected_message", [
("2016-01-10 11:09:00.000000+00:00", "Data available for 7 days"),

View File

@@ -5,7 +5,16 @@ from csv import DictReader
import pytest
from freezegun import freeze_time
from app.utils import email_safe, generate_notifications_csv, generate_previous_dict, generate_next_dict, Spreadsheet
from app.utils import (
email_safe,
generate_notifications_csv,
generate_previous_dict,
generate_next_dict,
Spreadsheet,
format_notification_for_csv
)
from tests import notification_json, single_notification_json, template_json
@pytest.mark.parametrize('service_name, safe_email', [
@@ -24,44 +33,6 @@ def test_email_safe_return_dot_separated_email_domain(service_name, safe_email):
assert email_safe(service_name) == safe_email
@pytest.mark.parametrize(
"status, template_type, expected_status",
[
('sending', None, 'Sending'),
('delivered', None, 'Delivered'),
('failed', None, 'Failed'),
('technical-failure', None, 'Technical failure'),
('temporary-failure', 'email', 'Inbox not accepting messages right now'),
('permanent-failure', 'email', 'Email address doesnt exist'),
('temporary-failure', 'sms', 'Phone not accepting messages right now'),
('permanent-failure', 'sms', 'Phone number doesnt exist')
]
)
@freeze_time("2016-01-01 15:09:00.061258")
def test_generate_csv_from_notifications(
app_,
service_one,
active_user_with_permissions,
mock_get_notifications,
status,
template_type,
expected_status
):
with app_.test_request_context():
csv_content = generate_notifications_csv(
mock_get_notifications(
service_one['id'],
rows=1,
set_template_type=template_type,
set_status=status
)['notifications']
)
for row in DictReader(StringIO(csv_content)):
assert row['Time'] == 'Friday 01 January 2016 at 15:09'
assert row['Status'] == expected_status
def test_generate_previous_dict(client):
ret = generate_previous_dict('main.view_jobs', 'foo', 2, {})
assert 'page=1' in ret['url']
@@ -85,3 +56,71 @@ def test_can_create_spreadsheet_from_large_excel_file():
with open(str(Path.cwd() / 'tests' / 'spreadsheet_files' / 'excel 2007.xlsx'), 'rb') as xl:
ret = Spreadsheet.from_file(xl, filename='xl.xlsx')
assert ret.as_csv_data
@pytest.mark.parametrize(
"status, template_type, expected_status",
[
('temporary-failure', 'email', 'Inbox not accepting messages right now'),
('permanent-failure', 'email', 'Email address doesnt exist'),
('temporary-failure', 'sms', 'Phone not accepting messages right now'),
('permanent-failure', 'sms', 'Phone number doesnt exist')
]
)
def test_format_notification_for_csv_formats_status(
status,
template_type,
expected_status
):
json_row = single_notification_json(
'1234',
template=template_json(service_id='1234', id_='5678', type_=template_type),
status=status
)
csv_line = format_notification_for_csv(json_row)
assert csv_line['Status'] == expected_status
@freeze_time("2016-01-01 15:09:00.061258")
def test_format_notification_for_csv_formats_time():
json_row = single_notification_json('1234')
csv_line = format_notification_for_csv(json_row)
assert csv_line['Time'] == 'Friday 01 January 2016 at 15:09'
def test_generate_notifications_csv_returns_correct_csv_file(mock_get_notifications):
csv_content = generate_notifications_csv(service_id='1234')
csv_file = DictReader(StringIO('\n'.join(csv_content)))
assert csv_file.fieldnames == ['Row number', 'Recipient', 'Template', 'Type', 'Job', 'Status', 'Time']
def test_generate_notifications_csv_only_calls_once_if_no_next_link(mock_get_notifications):
list(generate_notifications_csv(service_id='1234'))
assert mock_get_notifications.call_count == 1
def test_generate_notifications_csv_calls_twice_if_next_link(mocker):
service_id = '1234'
response_with_links = notification_json(service_id, rows=5, with_links=True)
response_with_no_links = notification_json(service_id, rows=2, with_links=False)
mock_get_notifications = mocker.patch(
'app.notification_api_client.get_notifications_for_service',
side_effect=[
response_with_links,
response_with_no_links,
]
)
csv_content = generate_notifications_csv(service_id=service_id)
csv = DictReader(StringIO('\n'.join(csv_content)))
assert len(list(csv)) == 7
assert mock_get_notifications.call_count == 2
# mock_calls[0][2] is the kwargs from first call
assert mock_get_notifications.mock_calls[0][2]['page'] == 1
assert mock_get_notifications.mock_calls[1][2]['page'] == 2

View File

@@ -1008,31 +1008,24 @@ def mock_get_notifications(mocker, api_user_active):
status=None,
limit_days=None,
rows=5,
set_template_type=None,
set_status=None,
include_jobs=None,
include_from_test_key=None
include_from_test_key=None,
):
job = None
if job_id is not None:
job = job_json(service_id, api_user_active, job_id=job_id)
if set_template_type:
return notification_json(
service_id,
template={'template_type': set_template_type, 'name': 'name', 'id': 'id', 'version': 1},
rows=rows,
status=set_status,
job=job,
with_links=True
)
if template_type:
template = template_json(service_id, id_=str(generate_uuid()), type_=template_type[0])
else:
return notification_json(
service_id,
rows=rows,
status=set_status,
job=job,
with_links=True
)
template = template_json(service_id, id_=str(generate_uuid()))
return notification_json(
service_id,
template=template,
rows=rows,
job=job
)
return mocker.patch(
'app.notification_api_client.get_notifications_for_service',