Strip HTML from template content

Templates are created in the admin app and persisted in the API.

They are consumed:
- in the admin app, by requesting them from the API
- in the API, by loading them from the database

There are two potential places where unescaped HTML could be sent to a user:
- when the admin app is previewing a template (it has to render the template as
  markup in order to show the placeholders)
- in the body of an email

For all consumers to have confidence that the templates are safe, it makes sense
to santitise them at the point of creation (and modification). This also avoids
any performance issues that could come from doing it at the point of requesting
a template.

In the future they could be created by a direct API call, bypassing the admin
app. Therefore it makes sense for the API to sanitise them.

The commit sanitises templates using an XML parser. It extracts the text content
of the template, minus any HTML tags, because of this famous StackOverflow
answer:
http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags/1732454#1732454
This commit is contained in:
Chris Hill-Scott
2016-03-04 07:03:15 +00:00
parent 3fd3aa5b3e
commit 703e513ddf
3 changed files with 12 additions and 4 deletions

View File

@@ -4,6 +4,7 @@ from flask import (
request,
current_app
)
from lxml import html
from sqlalchemy.exc import IntegrityError
from app.dao.templates_dao import (
@@ -34,6 +35,7 @@ def create_template(service_id):
if errors:
return jsonify(result="error", message=errors), 400
new_template.service = fetched_service
new_template.content = _strip_html(new_template.content)
try:
dao_create_template(new_template)
except IntegrityError as ex:
@@ -55,6 +57,7 @@ def update_template(service_id, template_id):
current_data = dict(template_schema.dump(fetched_template).data.items())
current_data.update(request.get_json())
current_data['content'] = _strip_html(current_data['content'])
update_dict, errors = template_schema.load(current_data)
if errors:
@@ -79,3 +82,7 @@ def get_template_by_id_and_service_id(service_id, template_id):
return jsonify(data=data)
else:
return jsonify(result="error", message="Template not found"), 404
def _strip_html(content):
return html.document_fromstring(content).text_content()

View File

@@ -2,6 +2,7 @@ Flask==0.10.1
Flask-Script==2.0.5
Flask-Migrate==1.3.1
Flask-SQLAlchemy==2.0
lxml==3.5.0
psycopg2==2.6.1
SQLAlchemy==1.0.5
SQLAlchemy-Utils==0.30.5

View File

@@ -10,7 +10,7 @@ def test_should_create_a_new_sms_template_for_a_service(notify_api, sample_servi
data = {
'name': 'my template',
'template_type': 'sms',
'content': 'template content',
'content': 'template <b>content</b>',
'service': str(sample_service.id)
}
data = json.dumps(data)
@@ -42,7 +42,7 @@ def test_should_create_a_new_email_template_for_a_service(notify_api, sample_ser
'name': 'my template',
'template_type': 'email',
'subject': 'subject',
'content': 'template content',
'content': 'template <b>content</b>',
'service': str(sample_service.id)
}
data = json.dumps(data)
@@ -222,7 +222,7 @@ def test_should_be_able_to_update_a_template(notify_api, sample_service):
json_resp = json.loads(create_response.get_data(as_text=True))
assert json_resp['data']['name'] == 'my template'
data = {
'name': 'my template has a new name'
'content': 'my template has new content <script type="text/javascript">alert("foo")</script>'
}
data = json.dumps(data)
auth_header = create_authorization_header(
@@ -239,7 +239,7 @@ def test_should_be_able_to_update_a_template(notify_api, sample_service):
assert update_response.status_code == 200
update_json_resp = json.loads(update_response.get_data(as_text=True))
assert update_json_resp['data']['name'] == 'my template has a new name'
assert update_json_resp['data']['content'] == 'my template has new content alert("foo")'
def test_should_be_able_to_get_all_templates_for_a_service(notify_api, sample_service):