From 09d6c60ff152d771a152c804494f25cfa91fda8f Mon Sep 17 00:00:00 2001 From: Leo Hemsted Date: Mon, 12 Aug 2019 13:53:22 +0100 Subject: [PATCH] punycode encode emails before sending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amazon SES only accepts domains encoded in punycode, an encoding that converts utf-8 into an ascii encoding that browsers and mailservers recognise. We currently just send through emails as we store them (in full unicode), which means any rogue characters break SES and cause us to put the email in technical-failure. Most of these appear to be typos and rogue control characters, but there is a small chance that it could be a real domain (eg https://πŸ…‚π–π€β‚›α΅–π’“.β“œπ• π’ƒπ“²/πŸ††πŸ†ƒπŸ…΅/). We should encode to and reply-to-address emails as punycode to make sure that they can always be sent. The chance that anyone actually uses a unicode domain name for their email is probably pretty low, but we should allow it for completeness. --- app/clients/email/aws_ses.py | 10 +++++-- tests/app/clients/test_aws_ses.py | 44 ++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/app/clients/email/aws_ses.py b/app/clients/email/aws_ses.py index c801bb3c5..999315588 100644 --- a/app/clients/email/aws_ses.py +++ b/app/clients/email/aws_ses.py @@ -83,7 +83,7 @@ class AwsSesClient(EmailClient): response = self._client.send_email( Source=source, Destination={ - 'ToAddresses': to_addresses, + 'ToAddresses': [punycode_encode_email(addr) for addr in to_addresses], 'CcAddresses': [], 'BccAddresses': [] }, @@ -93,7 +93,7 @@ class AwsSesClient(EmailClient): }, 'Body': body }, - ReplyToAddresses=reply_to_addresses + ReplyToAddresses=[punycode_encode_email(addr) for addr in reply_to_addresses] ) except botocore.exceptions.ClientError as e: self.statsd_client.incr("clients.ses.error") @@ -116,3 +116,9 @@ class AwsSesClient(EmailClient): self.statsd_client.timing("clients.ses.request-time", elapsed_time) self.statsd_client.incr("clients.ses.success") return response['MessageId'] + + +def punycode_encode_email(email_address): + # only the hostname should ever be punycode encoded. + local, hostname = email_address.split('@') + return '{}@{}'.format(local, hostname.encode('idna').decode('utf-8')) diff --git a/tests/app/clients/test_aws_ses.py b/tests/app/clients/test_aws_ses.py index 1f4913bbc..4842bbb63 100644 --- a/tests/app/clients/test_aws_ses.py +++ b/tests/app/clients/test_aws_ses.py @@ -45,21 +45,21 @@ def test_should_be_none_if_unrecognised_status_code(): assert '99' in str(e.value) -@pytest.mark.parametrize( - 'reply_to_address, expected_value', - [(None, []), ('foo@bar.com', ['foo@bar.com'])], - ids=['empty', 'single_email'] -) +@pytest.mark.parametrize('reply_to_address, expected_value', [ + (None, []), + ('foo@bar.com', ['foo@bar.com']), + ('fΓΈΓΈΓΈΓΈ@bΓ₯Γ₯Γ₯Γ₯Γ₯r.com', ['fΓΈΓΈΓΈΓΈ@xn--br-yiaaaaa.com']) +], ids=['empty', 'single_email', 'punycode']) def test_send_email_handles_reply_to_address(notify_api, mocker, reply_to_address, expected_value): boto_mock = mocker.patch.object(aws_ses_client, '_client', create=True) mocker.patch.object(aws_ses_client, 'statsd_client', create=True) with notify_api.app_context(): aws_ses_client.send_email( - Mock(), - Mock(), - Mock(), - Mock(), + source=Mock(), + to_addresses='to@address.com', + subject=Mock(), + body=Mock(), reply_to_address=reply_to_address ) @@ -71,6 +71,26 @@ def test_send_email_handles_reply_to_address(notify_api, mocker, reply_to_addres ) +def test_send_email_handles_punycode_to_address(notify_api, mocker): + boto_mock = mocker.patch.object(aws_ses_client, '_client', create=True) + mocker.patch.object(aws_ses_client, 'statsd_client', create=True) + + with notify_api.app_context(): + aws_ses_client.send_email( + Mock(), + to_addresses='fΓΈΓΈΓΈΓΈ@bΓ₯Γ₯Γ₯Γ₯Γ₯r.com', + subject=Mock(), + body=Mock() + ) + + boto_mock.send_email.assert_called_once_with( + Source=ANY, + Destination={'ToAddresses': ['fΓΈΓΈΓΈΓΈ@xn--br-yiaaaaa.com'], 'CcAddresses': [], 'BccAddresses': []}, + Message=ANY, + ReplyToAddresses=ANY + ) + + def test_send_email_raises_bad_email_as_InvalidEmailError(mocker): boto_mock = mocker.patch.object(aws_ses_client, '_client', create=True) mocker.patch.object(aws_ses_client, 'statsd_client', create=True) @@ -87,13 +107,13 @@ def test_send_email_raises_bad_email_as_InvalidEmailError(mocker): with pytest.raises(InvalidEmailError) as excinfo: aws_ses_client.send_email( source=Mock(), - to_addresses='clearly@invalid@email.com', + to_addresses='definitely@invalid_email.com', subject=Mock(), body=Mock() ) assert 'some error message from amazon' in str(excinfo.value) - assert 'clearly@invalid@email.com' in str(excinfo.value) + assert 'definitely@invalid_email.com' in str(excinfo.value) def test_send_email_raises_other_errs_as_AwsSesClientException(mocker): @@ -112,7 +132,7 @@ def test_send_email_raises_other_errs_as_AwsSesClientException(mocker): with pytest.raises(AwsSesClientException) as excinfo: aws_ses_client.send_email( source=Mock(), - to_addresses=Mock(), + to_addresses='foo@bar.com', subject=Mock(), body=Mock() )