notifications-admin/tests/notifications_utils/test_recipient_csv.py

import itertools
import string
import unicodedata
from functools import partial
from random import choice, randrange
from unittest.mock import Mock

import pytest
from ordered_set import OrderedSet

from notifications_utils import SMS_CHAR_COUNT_LIMIT
from notifications_utils.countries import Country
from notifications_utils.formatters import strip_and_remove_obscure_whitespace
from notifications_utils.recipients import (
    Cell,
    RecipientCSV,
    Row,
    first_column_headings,
)
from notifications_utils.template import (
    EmailPreviewTemplate,
    LetterImageTemplate,
    SMSMessageTemplate,
)


def _sample_template(template_type, content="foo"):
    return {
        "email": EmailPreviewTemplate(
            {"content": content, "subject": "bar", "template_type": "email"}
        ),
        "sms": SMSMessageTemplate({"content": content, "template_type": "sms"}),
        "letter": LetterImageTemplate(
            {"content": content, "subject": "bar", "template_type": "letter"},
            image_url="https://example.com",
            page_count=1,
        ),
    }.get(template_type)


def _index_rows(rows):
    return set(row.index for row in rows)


@pytest.mark.parametrize(
    "template_type, expected",
    (
        ("email", ["email address"]),
        ("sms", ["phone number"]),
        (
            "letter",
            [
                "address line 1",
                "address line 2",
                "address line 3",
                "address line 4",
                "address line 5",
                "address line 6",
                "postcode",
                "address line 7",
            ],
        ),
    ),
)
def test_recipient_column_headers(template_type, expected):
    recipients = RecipientCSV("", template=_sample_template(template_type))
    assert (
        (recipients.recipient_column_headers)
        == (first_column_headings[template_type])
        == (expected)
    )


@pytest.mark.parametrize(
    "file_contents,template_type,expected",
    [
        (
            "",
            "sms",
            [],
        ),
        (
            "phone number",
            "sms",
            [],
        ),
        (
            """
                phone number,name
                +44 123, test1
                +44 456,test2
            """,
            "sms",
            [
                [("phone number", "+44 123"), ("name", "test1")],
                [("phone number", "+44 456"), ("name", "test2")],
            ],
        ),
        (
            """
                phone number,name
                +44 123,
                +44 456
            """,
            "sms",
            [
                [("phone number", "+44 123"), ("name", None)],
                [("phone number", "+44 456"), ("name", None)],
            ],
        ),
        (
            """
                email address,name
                test@example.com,test1
                test2@example.com, test2
            """,
            "email",
            [
                [("email address", "test@example.com"), ("name", "test1")],
                [("email address", "test2@example.com"), ("name", "test2")],
            ],
        ),
        (
            """
                email address
                test@example.com,test1,red
                test2@example.com, test2,blue
            """,
            "email",
            [
                [("email address", "test@example.com"), (None, ["test1", "red"])],
                [("email address", "test2@example.com"), (None, ["test2", "blue"])],
            ],
        ),
        (
            """
                email address,name
                test@example.com,"test1"
                test2@example.com,"   test2    "
                test3@example.com," test3"
            """,
            "email",
            [
                [("email address", "test@example.com"), ("name", "test1")],
                [("email address", "test2@example.com"), ("name", "test2")],
                [("email address", "test3@example.com"), ("name", "test3")],
            ],
        ),
        (
            """
                email address,date,name
                test@example.com,"Nov 28, 2016",test1
                test2@example.com,"Nov 29, 2016",test2
            """,
            "email",
            [
                [
                    ("email address", "test@example.com"),
                    ("date", "Nov 28, 2016"),
                    ("name", "test1"),
                ],
                [
                    ("email address", "test2@example.com"),
                    ("date", "Nov 29, 2016"),
                    ("name", "test2"),
                ],
            ],
        ),
        (
            """
                address_line_1
                Alice
                Bob
            """,
            "letter",
            [[("address_line_1", "Alice")], [("address_line_1", "Bob")]],
        ),
        (
            """
                address line 1,address line 2,address line 5,address line 6,postcode,name,thing
                A. Name,,,,XM4 5HQ,example,example
            """,
            "letter",
            [
                [
                    ("addressline1", "A. Name"),
                    ("addressline2", None),
                    # optional address rows 3 and 4 not in file
                    ("addressline5", None),
                    ("addressline5", None),
                    ("postcode", "XM4 5HQ"),
                    ("name", "example"),
                    ("thing", "example"),
                ]
            ],
        ),
        (
            """
                phone number, list, list, list
                07900900001, cat, rat, gnat
                07900900002, dog, hog, frog
                07900900003, elephant
            """,
            "sms",
            [
                [("phone number", "07900900001"), ("list", ["cat", "rat", "gnat"])],
                [("phone number", "07900900002"), ("list", ["dog", "hog", "frog"])],
                [("phone number", "07900900003"), ("list", ["elephant", None, None])],
            ],
        ),
    ],
)
def test_get_rows(file_contents, template_type, expected):
    rows = list(
        RecipientCSV(file_contents, template=_sample_template(template_type)).rows
    )
    if not expected:
        assert rows == expected
    for index, row in enumerate(expected):
        assert len(rows[index].items()) == len(row)
        for key, value in row:
            assert rows[index].get(key).data == value


def test_get_rows_does_no_error_checking_of_rows_or_cells(mocker):
    has_error_mock = mocker.patch.object(Row, "has_error")
    has_bad_recipient_mock = mocker.patch.object(Row, "has_bad_recipient")
    has_missing_data_mock = mocker.patch.object(Row, "has_missing_data")
    cell_recipient_error_mock = mocker.patch.object(Cell, "recipient_error")

    recipients = RecipientCSV(
        """
            email address, name
            a@b.com,
            a@b.com, My Name
            a@b.com,


        """,
        template=_sample_template("email", "hello ((name))"),
        max_errors_shown=3,
    )

    rows = recipients.get_rows()
    for _ in range(3):
        assert next(rows).recipient == "a@b.com"

    assert has_error_mock.called is False
    assert has_bad_recipient_mock.called is False
    assert has_missing_data_mock.called is False
    assert cell_recipient_error_mock.called is False


def test_get_rows_only_iterates_over_file_once(mocker):
    row_mock = mocker.patch("notifications_utils.recipients.Row")

    recipients = RecipientCSV(
        """
            email address, name
            a@b.com,
            a@b.com, My Name
            a@b.com,


        """,
        template=_sample_template("email", "hello ((name))"),
    )

    rows = recipients.get_rows()
    for _ in range(3):
        next(rows)

    assert row_mock.call_count == 3
    assert recipients.rows_as_list is None


@pytest.mark.parametrize(
    "file_contents,template_type,expected",
    [
        (
            """
                phone number,name
                2348675309, test1
                +1234-867-5301,test2
                ,
            """,
            "sms",
            [
                {"index": 0, "message_too_long": False},
                {"index": 1, "message_too_long": False},
            ],
        ),
        (
            """
                email address,name,colour
                test@example.com,test1,blue
                test2@example.com, test2,red
            """,
            "email",
            [
                {"index": 0, "message_too_long": False},
                {"index": 1, "message_too_long": False},
            ],
        ),
    ],
)
def test_get_annotated_rows(file_contents, template_type, expected):
    recipients = RecipientCSV(
        file_contents,
        template=_sample_template(template_type, "hello ((name))"),
        max_initial_rows_shown=1,
    )
    for index, expected_row in enumerate(expected):
        annotated_row = list(recipients.rows)[index]
        assert annotated_row.index == expected_row["index"]
        assert annotated_row.message_too_long == expected_row["message_too_long"]
    assert len(list(recipients.rows)) == 2
    assert len(list(recipients.initial_rows)) == 1
    assert not recipients.has_errors


def test_get_rows_with_errors():
    recipients = RecipientCSV(
        """
            email address, name
            a@b.com,
            a@b.com,
            a@b.com,
            a@b.com,
            a@b.com,
            a@b.com,


        """,
        template=_sample_template("email", "hello ((name))"),
        max_errors_shown=3,
    )
    assert len(list(recipients.rows_with_errors)) == 6
    assert len(list(recipients.initial_rows_with_errors)) == 3
    assert recipients.has_errors


@pytest.mark.parametrize(
    "template_type, row_count, header, filler, row_with_error",
    [
        (
            "email",
            500,
            "email address\n",
            "test@example.com\n",
            "test at example dot com",
        ),
        ("sms", 500, "phone number\n", "2348675309\n", "12345"),
    ],
)
def test_big_list_validates_right_through(
    template_type, row_count, header, filler, row_with_error
):
    big_csv = RecipientCSV(
        header + (filler * (row_count - 1) + row_with_error),
        template=_sample_template(template_type),
        max_errors_shown=100,
        max_initial_rows_shown=3,
    )
    assert len(list(big_csv.rows)) == row_count
    assert _index_rows(big_csv.rows_with_bad_recipients) == {row_count - 1}  # 0 indexed
    assert _index_rows(big_csv.rows_with_errors) == {row_count - 1}
    assert len(list(big_csv.initial_rows_with_errors)) == 1
    assert big_csv.has_errors


@pytest.mark.parametrize(
    "template_type, row_count, header, filler",
    [
        ("email", 50, "email address\n", "test@example.com\n"),
        ("sms", 50, "phone number\n", "07900900123\n"),
    ],
)
def test_check_if_message_too_long_for_sms_but_not_email_in_CSV(
    mocker, template_type, row_count, header, filler
):
    # we do not validate email size for CSVs to avoid performance issues
    RecipientCSV(
        header + filler * row_count,
        template=_sample_template(template_type),
        max_errors_shown=100,
        max_initial_rows_shown=3,
    )
    is_message_too_long = mocker.patch(
        "notifications_utils.template.Template.is_message_too_long", side_effect=False
    )
    if template_type == "email":
        is_message_too_long.assert_not_called
    else:
        is_message_too_long.called


def test_overly_big_list_stops_processing_rows_beyond_max(mocker):
    mock_strip_and_remove_obscure_whitespace = mocker.patch(
        "notifications_utils.recipients.strip_and_remove_obscure_whitespace",
        wraps=strip_and_remove_obscure_whitespace,
    )
    mock_insert_or_append_to_dict = mocker.patch(
        "notifications_utils.recipients.insert_or_append_to_dict"
    )

    big_csv = RecipientCSV(
        "phonenumber,name\n" + ("2348675309,example\n" * 123),
        template=_sample_template("sms", content="hello ((name))"),
    )
    big_csv.max_rows = 10

    # Our CSV has lots of rows…
    assert big_csv.too_many_rows
    assert len(big_csv) == 123

    # …but we’ve only called the expensive whitespace function on each
    # of the 2 cells in the first 10 rows
    assert len(mock_strip_and_remove_obscure_whitespace.call_args_list) == 20

    # …and we’ve only called the function which builds the internal data
    # structure once for each of the first 10 rows
    assert len(mock_insert_or_append_to_dict.call_args_list) == 10


def test_file_with_lots_of_empty_columns():
    process = Mock()

    lots_of_commas = "," * 10_000

    for row in RecipientCSV(
        f"phone_number{lots_of_commas}\n" + (f"07900900900{lots_of_commas}\n" * 100),
        template=_sample_template("sms"),
    ):
        assert [(key, cell.data) for key, cell in row.items()] == [
            # Note that we haven’t stored any of the empty cells
            ("phonenumber", "07900900900")
        ]
        process()

    assert process.call_count == 100


def test_empty_column_names():
    recipient_csv = RecipientCSV(
        """
            phone_number,,,name
            07900900123,foo,bar,baz
        """,
        template=_sample_template("sms"),
    )

    assert recipient_csv[0]["phone_number"].data == "07900900123"
    assert recipient_csv[0][""].data == ["foo", "bar"]
    assert recipient_csv[0]["name"].data == "baz"


@pytest.mark.parametrize(
    "file_contents,template,expected_recipients,expected_personalisation",
    [
        (
            """
                phone number,name, date
                +44 123,test1,today
                +44456,    ,tomorrow
                ,,
                , ,
            """,
            _sample_template("sms", "hello ((name))"),
            ["+44 123", "+44456"],
            [{"name": "test1"}, {"name": None}],
        ),
        (
            """
                email address,name,colour
                test@example.com,test1,red
                testatexampledotcom,test2,blue
            """,
            _sample_template("email", "((colour))"),
            ["test@example.com", "testatexampledotcom"],
            [{"colour": "red"}, {"colour": "blue"}],
        ),
        (
            """
                email address
                test@example.com,test1,red
                testatexampledotcom,test2,blue
            """,
            _sample_template("email"),
            ["test@example.com", "testatexampledotcom"],
            [],
        ),
    ],
)
def test_get_recipient(
    file_contents, template, expected_recipients, expected_personalisation
):
    recipients = RecipientCSV(file_contents, template=template)

    for index, row in enumerate(expected_personalisation):
        for key, value in row.items():
            assert recipients[index].recipient == expected_recipients[index]
            assert recipients[index].personalisation.get(key) == value


@pytest.mark.parametrize(
    "file_contents,template,expected_recipients,expected_personalisation",
    [
        (
            """
                email address,test
                test@example.com,test1,red
                testatexampledotcom,test2,blue
            """,
            _sample_template("email", "((test))"),
            [(0, "test@example.com"), (1, "testatexampledotcom")],
            [
                {"emailaddress": "test@example.com", "test": "test1"},
                {"emailaddress": "testatexampledotcom", "test": "test2"},
            ],
        )
    ],
)
def test_get_recipient_respects_order(
    file_contents, template, expected_recipients, expected_personalisation
):
    recipients = RecipientCSV(file_contents, template=template)

    for row, email in expected_recipients:
        assert (
            recipients[row].index,
            recipients[row].recipient,
            recipients[row].personalisation,
        ) == (
            row,
            email,
            expected_personalisation[row],
        )


@pytest.mark.parametrize(
    "file_contents,template_type,expected,expected_missing",
    [
        ("", "sms", [], set(["phone number", "name"])),
        (
            """
                phone number,name
                2348675309,test1
                2348675309,test1
                2348675309,test1
            """,
            "sms",
            ["phone number", "name"],
            set(),
        ),
        (
            """
                email address,name,colour
            """,
            "email",
            ["email address", "name", "colour"],
            set(),
        ),
        (
            """
                address_line_1, address_line_2, postcode, name
            """,
            "letter",
            ["address_line_1", "address_line_2", "postcode", "name"],
            set(),
        ),
        (
            """
                email address,colour
            """,
            "email",
            ["email address", "colour"],
            set(["name"]),
        ),
        (
            """
                address_line_1, address_line_2, name
            """,
            "letter",
            ["address_line_1", "address_line_2", "name"],
            set(),
        ),
        (
            """
                phone number,list,list,name,list
            """,
            "sms",
            ["phone number", "list", "name"],
            set(),
        ),
    ],
)
def test_column_headers(file_contents, template_type, expected, expected_missing):
    recipients = RecipientCSV(
        file_contents, template=_sample_template(template_type, "((name))")
    )
    assert recipients.column_headers == expected
    assert recipients.missing_column_headers == expected_missing
    assert recipients.has_errors == bool(expected_missing)


@pytest.mark.parametrize(
    "content",
    [
        "hello",
        "hello ((name))",
    ],
)
@pytest.mark.parametrize(
    "file_contents,template_type",
    [
        pytest.param("", "sms", marks=pytest.mark.xfail),
        pytest.param("name", "sms", marks=pytest.mark.xfail),
        pytest.param("email address", "sms", marks=pytest.mark.xfail),
        pytest.param(
            "address_line_1",
            "letter",
            marks=pytest.mark.xfail,
        ),
        pytest.param(
            "address_line_1, address_line_2",
            "letter",
            marks=pytest.mark.xfail,
        ),
        pytest.param(
            "address_line_6, postcode",
            "letter",
            marks=pytest.mark.xfail,
        ),
        pytest.param(
            "address_line_1, postcode, address_line_7",
            "letter",
            marks=pytest.mark.xfail,
        ),
        ("phone number", "sms"),
        ("phone number,name", "sms"),
        ("email address", "email"),
        ("email address,name", "email"),
        ("PHONENUMBER", "sms"),
        ("email_address", "email"),
        ("address_line_1, address_line_2, postcode", "letter"),
        ("address_line_1, address_line_2, address_line_7", "letter"),
        ("address_line_1, address_line_2, address_line_3", "letter"),
        ("address_line_4, address_line_5, address_line_6", "letter"),
        (
            "address_line_1, address_line_2, address_line_3, address_line_4, address_line_5, address_line_6, postcode",
            "letter",
        ),
    ],
)
def test_recipient_column(content, file_contents, template_type):
    assert RecipientCSV(
        file_contents, template=_sample_template(template_type, content)
    ).has_recipient_columns


@pytest.mark.parametrize(
    "file_contents,template_type,rows_with_bad_recipients,rows_with_missing_data",
    [
        (
            """
                phone number,name,date
                2348675309,test1,test1
                2348675309,test1
                +44 123,test1,test1
                2348675309,test1,test1
                2348675309,test1
                +1644000000,test1,test1
                ,test1,test1
            """,
            "sms",
            {2, 5},
            {1, 4, 6},
        ),
        (
            """
                phone number,name
                2348675309,test1,test2
            """,
            "sms",
            set(),
            set(),
        ),
        (
            """
            """,
            "sms",
            set(),
            set(),
        ),
        (
            # missing postcode
            """
                address_line_1,address_line_2,address_line_3,address_line_4,address_line_5,postcode,date
                name,          building,      street,        town,          county,        SE1 7LS,today
                name,          building,      street,        town,          county,        ,        today
            """,
            "letter",
            {1},
            set(),
        ),
        (
            # not enough address fields
            """
                address_line_1, postcode, date
                name,           SE1 7LS, today
            """,
            "letter",
            {0},
            set(),
        ),
        (
            # optional address fields not filled in
            """
                address_line_1,address_line_2,address_line_3,address_line_4,address_line_5,postcode,date
                name          ,123 fake st.  ,              ,              ,              ,SE1 7LS,today
                name          ,              ,              ,              ,              ,SE1 7LS,today
            """,
            "letter",
            {1},
            set(),
        ),
        (
            # Can use any address columns
            """
                address_line_3, address_line_4, address_line_7, date
                name          , 123 fake st.,   SE1 7LS,        today
            """,
            "letter",
            set(),
            set(),
        ),
    ],
)
@pytest.mark.parametrize(
    "partial_instance",
    [
        partial(RecipientCSV),
        partial(RecipientCSV, allow_international_sms=False),
    ],
)
def test_bad_or_missing_data(
    file_contents,
    template_type,
    rows_with_bad_recipients,
    rows_with_missing_data,
    partial_instance,
):
    recipients = partial_instance(
        file_contents, template=_sample_template(template_type, "((date))")
    )
    assert _index_rows(recipients.rows_with_bad_recipients) == rows_with_bad_recipients
    assert _index_rows(recipients.rows_with_missing_data) == rows_with_missing_data
    if rows_with_bad_recipients or rows_with_missing_data:
        assert recipients.has_errors is True


@pytest.mark.parametrize(
    "file_contents,rows_with_bad_recipients",
    [
        (
            """
            phone number
            +800000000000
            1234
            +447900123
        """,
            {0, 1, 2},
        ),
        (
            """
            phone number, country
            1-202-234-0104, USA
            +12022340104, USA
            +23051234567, Mauritius
        """,
            {2},
        ),
    ],
)
def test_international_recipients(file_contents, rows_with_bad_recipients):
    recipients = RecipientCSV(
        file_contents,
        template=_sample_template("sms"),
        allow_international_sms=True,
    )
    assert _index_rows(recipients.rows_with_bad_recipients) == rows_with_bad_recipients


def test_errors_when_too_many_rows():
    recipients = RecipientCSV(
        "email address\n" + ("a@b.com\n" * 101),
        template=_sample_template("email"),
    )

    # Confirm the normal max_row limit
    assert recipients.max_rows == 100_000
    # Override to make this test faster
    recipients.max_rows = 100

    assert recipients.too_many_rows is True
    assert recipients.has_errors is True
    assert recipients.rows[99]["email_address"].data == "a@b.com"
    # We stop processing subsequent rows
    assert recipients.rows[100] is None


@pytest.mark.parametrize(
    "file_contents,template_type,guestlist,count_of_rows_with_errors",
    [
        (
            """
                phone number
                2348675309
                2348675301
                2348675302
                2348675303
            """,
            "sms",
            ["+12348675309"],  # Same as first phone number but in different format
            3,
        ),
        (
            """
                phone number
                12348675309
                2348675301
                2348675302
            """,
            "sms",
            [
                "2348675309",
                "12348675301",
                "2348675302",
                "2341231234",
                "test@example.com",
            ],
            0,
        ),
        (
            """
                email address
                IN_GUESTLIST@EXAMPLE.COM
                not_in_guestlist@example.com
            """,
            "email",
            [
                "in_guestlist@example.com",
                "2348675309",
            ],  # Email case differs to the one in the CSV
            1,
        ),
    ],
)
def test_recipient_guestlist(
    file_contents, template_type, guestlist, count_of_rows_with_errors
):
    recipients = RecipientCSV(
        file_contents, template=_sample_template(template_type), guestlist=guestlist
    )

    if count_of_rows_with_errors:
        assert not recipients.allowed_to_send_to
    else:
        assert recipients.allowed_to_send_to

    # Make sure the guestlist isn’t emptied by reading it. If it’s an iterator then
    # there’s a risk that it gets emptied after being read once
    recipients.guestlist = (
        str(fake_number) for fake_number in range(7700900888, 7700900898)
    )
    list(recipients.guestlist)
    assert not recipients.allowed_to_send_to
    assert recipients.has_errors

    # An empty guestlist is treated as no guestlist at all
    recipients.guestlist = []
    assert recipients.allowed_to_send_to
    recipients.guestlist = itertools.chain()
    assert recipients.allowed_to_send_to


def test_detects_rows_which_result_in_overly_long_messages():
    template = SMSMessageTemplate(
        {"content": "((placeholder))", "template_type": "sms"},
        sender=None,
        prefix=None,
    )
    recipients = RecipientCSV(
        """
            phone number,placeholder
            2348675309,1
            2348675301,{one_under}
            2348675302,{exactly}
            2348675303,{one_over}
        """.format(
            one_under="a" * (SMS_CHAR_COUNT_LIMIT - 1),
            exactly="a" * SMS_CHAR_COUNT_LIMIT,
            one_over="a" * (SMS_CHAR_COUNT_LIMIT + 1),
        ),
        template=template,
    )
    assert _index_rows(recipients.rows_with_errors) == {3}
    assert _index_rows(recipients.rows_with_message_too_long) == {3}
    assert recipients.has_errors
    assert recipients[0].has_error_spanning_multiple_cells is False
    assert recipients[1].has_error_spanning_multiple_cells is False
    assert recipients[2].has_error_spanning_multiple_cells is False
    assert recipients[3].has_error_spanning_multiple_cells is True


def test_detects_rows_which_result_in_empty_messages():
    template = SMSMessageTemplate(
        {"content": "((show??content))", "template_type": "sms"},
        sender=None,
        prefix=None,
    )
    recipients = RecipientCSV(
        """
            phone number,show
            2348675309,yes
            2348675301,no
            2348675302,yes
        """,
        template=template,
    )
    assert _index_rows(recipients.rows_with_errors) == {1}
    assert _index_rows(recipients.rows_with_empty_message) == {1}
    assert recipients.has_errors
    assert recipients[0].has_error_spanning_multiple_cells is False
    assert recipients[1].has_error_spanning_multiple_cells is True
    assert recipients[2].has_error_spanning_multiple_cells is False


@pytest.mark.parametrize(
    "key, expected",
    sum(
        [
            [(key, expected) for key in group]
            for expected, group in [
                (
                    "2348675309",
                    (
                        "phone number",
                        "   PHONENUMBER",
                        "phone_number",
                        "phone-number",
                        "phoneNumber",
                    ),
                ),
                (
                    "Jo",
                    (
                        "FIRSTNAME",
                        "first name",
                        "first_name ",
                        "first-name",
                        "firstName",
                    ),
                ),
                (
                    "Bloggs",
                    (
                        "Last    Name",
                        "LASTNAME",
                        "    last_name",
                        "last-name",
                        "lastName   ",
                    ),
                ),
            ]
        ],
        [],
    ),
)
def test_ignores_spaces_and_case_in_placeholders(key, expected):
    recipients = RecipientCSV(
        """
            phone number,FIRSTNAME, Last Name
            2348675309, Jo, Bloggs
        """,
        template=_sample_template(
            "sms", content="((phone_number)) ((First Name)) ((lastname))"
        ),
    )
    first_row = recipients[0]
    assert first_row.get(key).data == expected
    assert first_row[key].data == expected
    assert first_row.recipient == "2348675309"
    assert len(first_row.items()) == 3
    assert not recipients.has_errors

    assert recipients.missing_column_headers == set()
    recipients.placeholders = {"one", "TWO", "Thirty_Three"}
    assert recipients.missing_column_headers == {"one", "TWO", "Thirty_Three"}
    assert recipients.has_errors


@pytest.mark.parametrize(
    "character, name",
    (
        (" ", "SPACE"),
        # these ones don’t have unicode names
        ("\n", None),  # newline
        ("\r", None),  # carriage return
        ("\t", None),  # tab
        ("\u180E", "MONGOLIAN VOWEL SEPARATOR"),
        ("\u200B", "ZERO WIDTH SPACE"),
        ("\u200C", "ZERO WIDTH NON-JOINER"),
        ("\u200D", "ZERO WIDTH JOINER"),
        ("\u2060", "WORD JOINER"),
        ("\uFEFF", "ZERO WIDTH NO-BREAK SPACE"),
        # all the things
        (" \n\r\t\u000A\u000D\u180E\u200B\u200C\u200D\u2060\uFEFF", None),
    ),
)
def test_ignores_leading_whitespace_in_file(character, name):
    if name is not None:
        assert unicodedata.name(character) == name

    recipients = RecipientCSV(
        "{}emailaddress\ntest@example.com".format(character),
        template=_sample_template("email"),
    )
    first_row = recipients[0]

    assert recipients.column_headers == ["emailaddress"]
    assert recipients.recipient_column_headers == ["email address"]
    assert recipients.missing_column_headers == set()
    assert recipients.placeholders == ["email address"]

    assert first_row.get("email address").data == "test@example.com"
    assert first_row["email address"].data == "test@example.com"
    assert first_row.recipient == "test@example.com"

    assert not recipients.has_errors


def test_error_if_too_many_recipients():
    recipients = RecipientCSV(
        "phone number,\n2348675309,\n2348675309,\n2348675309,",
        template=_sample_template("sms"),
        remaining_messages=2,
    )
    assert recipients.has_errors
    assert recipients.more_rows_than_can_send


def test_dont_error_if_too_many_recipients_not_specified():
    recipients = RecipientCSV(
        "phone number,\n2348675309,\n2348675309,\n2348675309,",
        template=_sample_template("sms"),
    )
    assert not recipients.has_errors
    assert not recipients.more_rows_than_can_send


@pytest.mark.parametrize(
    "index, expected_row",
    [
        (
            0,
            {
                "phone number": "07700 90000 1",
                "colour": "red",
            },
        ),
        (
            1,
            {
                "phone_number": "07700 90000 2",
                "COLOUR": "green",
            },
        ),
        (
            2,
            {"p h o n e  n u m b e r": "07700 90000 3", "   colour   ": "blue"},
        ),
        pytest.param(
            3,
            {"phone number": "foo"},
            marks=pytest.mark.xfail(raises=IndexError),
        ),
        (
            -1,
            {"p h o n e  n u m b e r": "07700 90000 3", "   colour   ": "blue"},
        ),
    ],
)
def test_recipients_can_be_accessed_by_index(index, expected_row):
    recipients = RecipientCSV(
        """
            phone number, colour
            07700 90000 1, red
            07700 90000 2, green
            07700 90000 3, blue
        """,
        template=_sample_template("sms"),
    )
    for key, value in expected_row.items():
        assert recipients[index][key].data == value


@pytest.mark.parametrize("international_sms", (True, False))
def test_multiple_sms_recipient_columns(international_sms):
    recipients = RecipientCSV(
        """
            phone number, phone number, phone_number, foo
            234-867-5301, 234-867-5302, 234-867-5309, bar
        """,
        template=_sample_template("sms"),
        allow_international_sms=international_sms,
    )
    assert recipients.column_headers == ["phone number", "phone_number", "foo"]
    assert (
        recipients.column_headers_as_column_keys == dict(phonenumber="", foo="").keys()
    )
    assert recipients.rows[0].get("phone number").data == ("234-867-5309")
    assert recipients.rows[0].get("phone_number").data == ("234-867-5309")
    assert recipients.rows[0].get("phone number").error is None
    assert recipients.duplicate_recipient_column_headers == OrderedSet(
        ["phone number", "phone_number"]
    )
    assert recipients.has_errors


@pytest.mark.parametrize(
    "column_name",
    (
        "phone_number",
        "phonenumber",
        "phone number",
        "phone-number",
        "p h o n e  n u m b e r",
    ),
)
def test_multiple_sms_recipient_columns_with_missing_data(column_name):
    recipients = RecipientCSV(
        """
            names, phone number, {}
            "Joanna and Steve", 07900 900111
        """.format(
            column_name
        ),
        template=_sample_template("sms"),
        allow_international_sms=True,
    )
    expected_column_headers = ["names", "phone number"]
    if column_name != "phone number":
        expected_column_headers.append(column_name)
    assert recipients.column_headers == expected_column_headers
    assert (
        recipients.column_headers_as_column_keys
        == dict(phonenumber="", names="").keys()
    )
    # A piece of weirdness uncovered: since rows are created before spaces in column names are normalised, when
    # there are duplicate recipient columns and there is data for only one of the columns, if the columns have the same
    # spacing, phone number data will be a list of this one phone number and None, while if the spacing style differs
    # between two duplicate column names, the phone number data will be None. If there are no duplicate columns
    # then our code finds the phone number well regardless of the spacing, so this should not affect our users.
    phone_number_data = None
    if column_name == "phone number":
        phone_number_data = ["07900 900111", None]
    assert recipients.rows[0]["phonenumber"].data == phone_number_data
    assert recipients.rows[0].get("phone number").error is None
    expected_duplicated_columns = ["phone number"]
    if column_name != "phone number":
        expected_duplicated_columns.append(column_name)
    assert recipients.duplicate_recipient_column_headers == OrderedSet(
        expected_duplicated_columns
    )
    assert recipients.has_errors


def test_multiple_email_recipient_columns():
    recipients = RecipientCSV(
        """
            EMAILADDRESS, email_address, foo
            one@two.com,  two@three.com, bar
        """,
        template=_sample_template("email"),
    )
    assert recipients.rows[0].get("email address").data == ("two@three.com")
    assert recipients.rows[0].get("email address").error is None
    assert recipients.has_errors
    assert recipients.duplicate_recipient_column_headers == OrderedSet(
        ["EMAILADDRESS", "email_address"]
    )
    assert recipients.has_errors


def test_multiple_letter_recipient_columns():
    recipients = RecipientCSV(
        """
            address line 1, Address Line 2, address line 1, address_line_2
            1,2,3,4
        """,
        template=_sample_template("letter"),
    )
    assert recipients.rows[0].get("addressline1").data == ("3")
    assert recipients.rows[0].get("addressline1").error is None
    assert recipients.has_errors
    assert recipients.duplicate_recipient_column_headers == OrderedSet(
        ["address line 1", "Address Line 2", "address line 1", "address_line_2"]
    )
    assert recipients.has_errors


def test_displayed_rows_when_some_rows_have_errors():
    recipients = RecipientCSV(
        """
            email address, name
            a@b.com,
            a@b.com,
            a@b.com, My Name
            a@b.com,
            a@b.com,
        """,
        template=_sample_template("email", "((name))"),
        max_errors_shown=3,
    )

    assert len(list(recipients.displayed_rows)) == 3


def test_displayed_rows_when_there_are_no_rows_with_errors():
    recipients = RecipientCSV(
        """
            email address, name
            a@b.com, My Name
            a@b.com, My Name
            a@b.com, My Name
            a@b.com, My Name
        """,
        template=_sample_template("email", "((name))"),
        max_errors_shown=3,
    )

    assert len(list(recipients.displayed_rows)) == 4


def test_multi_line_placeholders_work():
    recipients = RecipientCSV(
        """
            email address, data
            a@b.com, "a\nb\n\nc"
        """,
        template=_sample_template("email", "((data))"),
    )

    assert recipients.rows[0].personalisation["data"] == "a\nb\n\nc"


@pytest.mark.parametrize(
    "extra_args, expected_errors, expected_bad_rows",
    (
        ({}, True, {0}),
        ({"allow_international_letters": False}, True, {0}),
        ({"allow_international_letters": True}, False, set()),
    ),
)
def test_accepts_international_addresses_when_allowed(
    extra_args, expected_errors, expected_bad_rows
):
    recipients = RecipientCSV(
        """
            address line 1, address line 2, address line 3
            First Lastname, 123 Example St, Fiji
            First Lastname, 123 Example St, SW1A 1AA
        """,
        template=_sample_template("letter"),
        **extra_args,
    )
    assert recipients.has_errors is expected_errors
    assert _index_rows(recipients.rows_with_bad_recipients) == expected_bad_rows
    # Prove that the error isn’t because the given country is unknown
    assert recipients[0].as_postal_address.country == Country("Fiji")


def test_address_validation_speed():
    # We should be able to validate 1000 lines of address data in about
    # a second – if it starts to get slow, something is inefficient
    number_of_lines = 1000
    uk_addresses_with_valid_postcodes = "\n".join(
        (
            "{n} Example Street, London, {a}{b} {c}{d}{e}".format(
                n=randrange(1000),
                a=choice(["n", "e", "sw", "se", "w"]),
                b=choice(range(1, 10)),
                c=choice(range(1, 10)),
                d=choice("ABDefgHJLNPqrstUWxyZ"),
                e=choice("ABDefgHJLNPqrstUWxyZ"),
            )
            for i in range(number_of_lines)
        )
    )
    recipients = RecipientCSV(
        "address line 1, address line 2, address line 3\n"
        + (uk_addresses_with_valid_postcodes),
        template=_sample_template("letter"),
        allow_international_letters=False,
    )
    for row in recipients:
        assert not row.has_bad_postal_address


def test_email_validation_speed():
    email_addresses = "\n".join(
        (
            "{a}{b}@example-{n}.com,Example,Thursday".format(
                n=randrange(1000),
                a=choice(string.ascii_letters),
                b=choice(string.ascii_letters),
            )
            for i in range(1000)
        )
    )
    recipients = RecipientCSV(
        "email address,name,day\n" + email_addresses,
        template=_sample_template(
            "email",
            content=f"""
                hello ((name)) today is ((day))
                here’s the letter ‘a’ 1000 times:
                {'a' * 1000}
            """,
        ),
    )
    for row in recipients:
        assert not row.has_error


@pytest.mark.parametrize("should_validate", [True, False])
def test_recipient_csv_checks_should_validate_flag(should_validate):
    template = _sample_template("sms")
    template.is_message_empty = Mock(return_value=False)

    recipients = RecipientCSV(
        """phone number,name
        2348675309, test1
        +447700 900 460,test2""",
        template=template,
        should_validate=should_validate,
    )

    recipients._get_error_for_field = Mock(return_value=None)

    list(recipients.get_rows())

    assert template.is_message_empty.called is should_validate
    assert recipients._get_error_for_field.called is should_validate