app/dao/notifications_dao.py

from datetime import datetime, timedelta
from itertools import groupby
from operator import attrgetter

from botocore.exceptions import ClientError
from flask import current_app
from notifications_utils.international_billing_rates import (
    INTERNATIONAL_BILLING_RATES,
)
from notifications_utils.recipients import (
    InvalidEmailError,
    try_validate_and_format_phone_number,
    validate_and_format_email_address,
)
from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst
from sqlalchemy import and_, asc, desc, func, or_, union
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.sql import functions
from sqlalchemy.sql.expression import case
from werkzeug.datastructures import MultiDict

from app import create_uuid, db, statsd_client
from app.dao.dao_utils import autocommit
from app.letters.utils import LetterPDFNotFound, find_letter_pdf_in_s3
from app.models import (
    EMAIL_TYPE,
    KEY_TYPE_NORMAL,
    KEY_TYPE_TEST,
    LETTER_TYPE,
    NOTIFICATION_CREATED,
    NOTIFICATION_DELIVERED,
    NOTIFICATION_PENDING,
    NOTIFICATION_PENDING_VIRUS_CHECK,
    NOTIFICATION_PERMANENT_FAILURE,
    NOTIFICATION_SENDING,
    NOTIFICATION_SENT,
    NOTIFICATION_STATUS_TYPES_COMPLETED,
    NOTIFICATION_TEMPORARY_FAILURE,
    SMS_TYPE,
    FactNotificationStatus,
    Notification,
    NotificationHistory,
    ProviderDetails,
)
from app.utils import (
    escape_special_characters,
    get_london_midnight_in_utc,
    midnight_n_days_ago,
)


def dao_get_last_date_template_was_used(template_id, service_id):
    last_date_from_notifications = db.session.query(
        functions.max(Notification.created_at)
    ).filter(
        Notification.service_id == service_id,
        Notification.template_id == template_id,
        Notification.key_type != KEY_TYPE_TEST
    ).scalar()

    if last_date_from_notifications:
        return last_date_from_notifications

    last_date = db.session.query(
        functions.max(FactNotificationStatus.bst_date)
    ).filter(
        FactNotificationStatus.template_id == template_id,
        FactNotificationStatus.key_type != KEY_TYPE_TEST
    ).scalar()

    return last_date


@autocommit
def dao_create_notification(notification):
    if not notification.id:
        # need to populate defaulted fields before we create the notification history object
        notification.id = create_uuid()
    if not notification.status:
        notification.status = NOTIFICATION_CREATED

    db.session.add(notification)


def country_records_delivery(phone_prefix):
    dlr = INTERNATIONAL_BILLING_RATES[phone_prefix]['attributes']['dlr']
    return dlr and dlr.lower() == 'yes'


def _update_notification_status(notification, status, detailed_status_code=None):
    # status = _decide_permanent_temporary_failure(
    #     status=status, notification=notification, detailed_status_code=detailed_status_code
    # )
    # notification.status = status
    # dao_update_notification(notification)
    return notification


@autocommit
def update_notification_status_by_id(notification_id, status, sent_by=None, detailed_status_code=None):
    notification = Notification.query.with_for_update().filter(Notification.id == notification_id).first()

    if not notification:
        current_app.logger.info('notification not found for id {} (update to status {})'.format(
            notification_id,
            status
        ))
        return None

    if notification.status not in {
        NOTIFICATION_CREATED,
        NOTIFICATION_SENDING,
        NOTIFICATION_PENDING,
        NOTIFICATION_SENT,
        NOTIFICATION_PENDING_VIRUS_CHECK
    }:
        _duplicate_update_warning(notification, status)
        return None

    if (
        notification.notification_type == SMS_TYPE
        and notification.international
        and not country_records_delivery(notification.phone_prefix)
    ):
        return None
    if not notification.sent_by and sent_by:
        notification.sent_by = sent_by
    return _update_notification_status(
        notification=notification,
        status=status,
        detailed_status_code=detailed_status_code
    )


@autocommit
def update_notification_status_by_reference(reference, status):
    # this is used to update letters and emails
    notification = Notification.query.filter(Notification.reference == reference).first()

    if not notification:
        current_app.logger.error('notification not found for reference {} (update to {})'.format(reference, status))
        return None

    if notification.status not in {
        NOTIFICATION_SENDING,
        NOTIFICATION_PENDING
    }:
        _duplicate_update_warning(notification, status)
        return None

    return _update_notification_status(
        notification=notification,
        status=status
    )


@autocommit
def dao_update_notification(notification):
    notification.updated_at = datetime.utcnow()
    db.session.add(notification)


def get_notifications_for_job(service_id, job_id, filter_dict=None, page=1, page_size=None):
    if page_size is None:
        page_size = current_app.config['PAGE_SIZE']
    query = Notification.query.filter_by(service_id=service_id, job_id=job_id)
    query = _filter_query(query, filter_dict)
    return query.order_by(asc(Notification.job_row_number)).paginate(
        page=page,
        per_page=page_size
    )


def dao_get_notification_count_for_job_id(*, job_id):
    return Notification.query.filter_by(job_id=job_id).count()


def get_notification_with_personalisation(service_id, notification_id, key_type):
    filter_dict = {'service_id': service_id, 'id': notification_id}
    if key_type:
        filter_dict['key_type'] = key_type

    return Notification.query.filter_by(**filter_dict).options(joinedload('template')).one()


def get_notification_by_id(notification_id, service_id=None, _raise=False):
    filters = [Notification.id == notification_id]

    if service_id:
        filters.append(Notification.service_id == service_id)

    query = Notification.query.filter(*filters)

    return query.one() if _raise else query.first()


def get_notifications_for_service(
        service_id,
        filter_dict=None,
        page=1,
        page_size=None,
        count_pages=True,
        limit_days=None,
        key_type=None,
        personalisation=False,
        include_jobs=False,
        include_from_test_key=False,
        older_than=None,
        client_reference=None,
        include_one_off=True,
        error_out=True
):
    if page_size is None:
        page_size = current_app.config['PAGE_SIZE']

    filters = [Notification.service_id == service_id]

    if limit_days is not None:
        filters.append(Notification.created_at >= midnight_n_days_ago(limit_days))

    if older_than is not None:
        older_than_created_at = db.session.query(
            Notification.created_at).filter(Notification.id == older_than).as_scalar()
        filters.append(Notification.created_at < older_than_created_at)

    if not include_jobs:
        filters.append(Notification.job_id == None)  # noqa

    if not include_one_off:
        filters.append(Notification.created_by_id == None)  # noqa

    if key_type is not None:
        filters.append(Notification.key_type == key_type)
    elif not include_from_test_key:
        filters.append(Notification.key_type != KEY_TYPE_TEST)

    if client_reference is not None:
        filters.append(Notification.client_reference == client_reference)

    query = Notification.query.filter(*filters)
    query = _filter_query(query, filter_dict)
    if personalisation:
        query = query.options(
            joinedload('template')
        )

    return query.order_by(desc(Notification.created_at)).paginate(
        page=page,
        per_page=page_size,
        count=count_pages,
        error_out=error_out,
    )


def _filter_query(query, filter_dict=None):
    if filter_dict is None:
        return query

    multidict = MultiDict(filter_dict)

    # filter by status
    statuses = multidict.getlist('status')
    if statuses:
        statuses = Notification.substitute_status(statuses)
        query = query.filter(Notification.status.in_(statuses))

    # filter by template
    template_types = multidict.getlist('template_type')
    if template_types:
        query = query.filter(Notification.notification_type.in_(template_types))

    return query


@autocommit
def insert_notification_history_delete_notifications(
    notification_type, service_id, timestamp_to_delete_backwards_from, qry_limit=50000
):
    """
    Delete up to 50,000 notifications that are past retention for a notification type and service.


    Steps are as follows:

    Create a temporary notifications table
    Populate that table with up to 50k notifications that are to be deleted. (Note: no specified order)
    Insert everything in the temp table into notification history
    Delete from notifications if notification id is in the temp table
    Drop the temp table (automatically when the transaction commits)

    Temporary tables are in a separate postgres schema, and only visible to the current session (db connection,
    in a celery task there's one connection per thread.)
    """
    # Setting default query limit to 50,000 which take about 48 seconds on current table size
    # 10, 000 took 11s and 100,000 took 1 min 30 seconds.
    select_into_temp_table = """
         CREATE TEMP TABLE NOTIFICATION_ARCHIVE ON COMMIT DROP AS
         SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id,
             key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units,
             client_reference, international, phone_prefix, rate_multiplier, notification_status,
              created_by_id, postage, document_download_count
          FROM notifications
        WHERE service_id = :service_id
          AND notification_type = :notification_type
          AND created_at < :timestamp_to_delete_backwards_from
          AND key_type in ('normal', 'team')
        limit :qry_limit
        """
    select_into_temp_table_for_letters = """
         CREATE TEMP TABLE NOTIFICATION_ARCHIVE ON COMMIT DROP AS
         SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id,
             key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units,
             client_reference, international, phone_prefix, rate_multiplier, notification_status,
              created_by_id, postage, document_download_count
          FROM notifications
        WHERE service_id = :service_id
          AND notification_type = :notification_type
          AND created_at < :timestamp_to_delete_backwards_from
          AND notification_status NOT IN ('pending-virus-check', 'created', 'sending')
          AND key_type in ('normal', 'team')
        limit :qry_limit
        """
    # Insert into NotificationHistory if the row already exists do nothing.
    insert_query = """
        insert into notification_history
         SELECT * from NOTIFICATION_ARCHIVE
          ON CONFLICT ON CONSTRAINT notification_history_pkey
          DO NOTHING
    """
    delete_query = """
        DELETE FROM notifications
        where id in (select id from NOTIFICATION_ARCHIVE)
    """
    input_params = {
        "service_id": service_id,
        "notification_type": notification_type,
        "timestamp_to_delete_backwards_from": timestamp_to_delete_backwards_from,
        "qry_limit": qry_limit
    }

    select_to_use = select_into_temp_table_for_letters if notification_type == 'letter' else select_into_temp_table
    db.session.execute(select_to_use, input_params)

    result = db.session.execute("select count(*) from NOTIFICATION_ARCHIVE").fetchone()[0]

    db.session.execute(insert_query)

    db.session.execute(delete_query)

    return result


def move_notifications_to_notification_history(
    notification_type,
    service_id,
    timestamp_to_delete_backwards_from,
    qry_limit=50000
):
    deleted = 0
    if notification_type == LETTER_TYPE:
        _delete_letters_from_s3(
            notification_type, service_id, timestamp_to_delete_backwards_from, qry_limit
        )
    delete_count_per_call = 1
    while delete_count_per_call > 0:
        delete_count_per_call = insert_notification_history_delete_notifications(
            notification_type=notification_type,
            service_id=service_id,
            timestamp_to_delete_backwards_from=timestamp_to_delete_backwards_from,
            qry_limit=qry_limit
        )
        deleted += delete_count_per_call

    # Deleting test Notifications, test notifications are not persisted to NotificationHistory
    Notification.query.filter(
        Notification.notification_type == notification_type,
        Notification.service_id == service_id,
        Notification.created_at < timestamp_to_delete_backwards_from,
        Notification.key_type == KEY_TYPE_TEST
    ).delete(synchronize_session=False)
    db.session.commit()

    return deleted


def _delete_letters_from_s3(
        notification_type, service_id, date_to_delete_from, query_limit
):
    letters_to_delete_from_s3 = db.session.query(
        Notification
    ).filter(
        Notification.notification_type == notification_type,
        Notification.created_at < date_to_delete_from,
        Notification.service_id == service_id,
        # although letters in non completed statuses do have PDFs in s3, they do not exist in the
        # production-letters-pdf bucket as they never made it that far so we do not try and delete
        # them from it
        Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED)
    ).limit(query_limit).all()
    for letter in letters_to_delete_from_s3:
        try:
            letter_pdf = find_letter_pdf_in_s3(letter)
            letter_pdf.delete()
        except ClientError:
            current_app.logger.exception(
                "Error deleting S3 object for letter: {}".format(letter.id))
        except LetterPDFNotFound:
            current_app.logger.warning(
                "No S3 object to delete for letter: {}".format(letter.id))


@autocommit
def dao_delete_notifications_by_id(notification_id):
    db.session.query(Notification).filter(
        Notification.id == notification_id
    ).delete(synchronize_session='fetch')


def dao_timeout_notifications(cutoff_time, limit=100000):
    """
    Set email and SMS notifications (only) to "temporary-failure" status
    if they're still sending from before the specified cutoff_time.
    """
    updated_at = datetime.utcnow()
    current_statuses = [NOTIFICATION_SENDING, NOTIFICATION_PENDING]
    new_status = NOTIFICATION_TEMPORARY_FAILURE

    notifications = Notification.query.filter(
        Notification.created_at < cutoff_time,
        Notification.status.in_(current_statuses),
        Notification.notification_type.in_([SMS_TYPE, EMAIL_TYPE])
    ).limit(limit).all()

    Notification.query.filter(
        Notification.id.in_([n.id for n in notifications]),
    ).update(
        {'status': new_status, 'updated_at': updated_at},
        synchronize_session=False
    )

    db.session.commit()
    return notifications


def is_delivery_slow_for_providers(
        created_at,
        threshold,
        delivery_time,
):
    """
    Returns a dict of providers and whether they are currently slow or not. eg:
    {
        'mmg': True,
        'firetext': False
    }
    """
    slow_notification_counts = db.session.query(
        ProviderDetails.identifier,
        case(
            [(
                Notification.status == NOTIFICATION_DELIVERED,
                (Notification.updated_at - Notification.sent_at) >= delivery_time
            )],
            else_=(datetime.utcnow() - Notification.sent_at) >= delivery_time
        ).label("slow"),
        func.count().label('count')
    ).select_from(
        ProviderDetails
    ).outerjoin(
        Notification, and_(
            Notification.notification_type == SMS_TYPE,
            Notification.sent_by == ProviderDetails.identifier,
            Notification.created_at >= created_at,
            Notification.sent_at.isnot(None),
            Notification.status.in_([NOTIFICATION_DELIVERED, NOTIFICATION_PENDING, NOTIFICATION_SENDING]),
            Notification.key_type != KEY_TYPE_TEST
        )
    ).filter(
        ProviderDetails.notification_type == 'sms',
        ProviderDetails.active
    ).order_by(
        ProviderDetails.identifier
    ).group_by(
        ProviderDetails.identifier,
        "slow"
    )

    slow_providers = {}
    for provider, rows in groupby(slow_notification_counts, key=attrgetter('identifier')):
        rows = list(rows)
        total_notifications = sum(row.count for row in rows)
        slow_notifications = sum(row.count for row in rows if row.slow)

        slow_providers[provider] = (slow_notifications / total_notifications >= threshold)
        statsd_client.gauge(f'slow-delivery.{provider}.ratio', slow_notifications / total_notifications)

    return slow_providers


@autocommit
def dao_update_notifications_by_reference(references, update_dict):
    updated_count = Notification.query.filter(
        Notification.reference.in_(references)
    ).update(
        update_dict,
        synchronize_session=False
    )

    updated_history_count = 0
    if updated_count != len(references):
        updated_history_count = NotificationHistory.query.filter(
            NotificationHistory.reference.in_(references)
        ).update(
            update_dict,
            synchronize_session=False
        )

    return updated_count, updated_history_count


def dao_get_notifications_by_recipient_or_reference(
    service_id,
    search_term,
    notification_type=None,
    statuses=None,
    page=1,
    page_size=None,
    error_out=True,
):

    if notification_type == SMS_TYPE:
        normalised = try_validate_and_format_phone_number(search_term)

        for character in {'(', ')', ' ', '-'}:
            normalised = normalised.replace(character, '')

        normalised = normalised.lstrip('+0')

    elif notification_type == EMAIL_TYPE:
        try:
            normalised = validate_and_format_email_address(search_term)
        except InvalidEmailError:
            normalised = search_term.lower()

    elif notification_type in {LETTER_TYPE, None}:
        # For letters, we store the address without spaces, so we need
        # to removes spaces from the search term to match. We also do
        # this when a notification type isn’t provided (this will
        # happen if a user doesn’t have permission to see the dashboard)
        # because email addresses and phone numbers will never be stored
        # with spaces either.
        normalised = ''.join(search_term.split()).lower()

    else:
        raise TypeError(
            f'Notification type must be {EMAIL_TYPE}, {SMS_TYPE}, {LETTER_TYPE} or None'
        )

    normalised = escape_special_characters(normalised)
    search_term = escape_special_characters(search_term)

    filters = [
        Notification.service_id == service_id,
        or_(
            Notification.normalised_to.like("%{}%".format(normalised)),
            Notification.client_reference.ilike("%{}%".format(search_term)),
        ),
        Notification.key_type != KEY_TYPE_TEST,
    ]

    if statuses:
        filters.append(Notification.status.in_(statuses))
    if notification_type:
        filters.append(Notification.notification_type == notification_type)

    results = db.session.query(Notification)\
        .filter(*filters)\
        .order_by(desc(Notification.created_at))\
        .paginate(page=page, per_page=page_size, count=False, error_out=error_out)
    return results


def dao_get_notification_by_reference(reference):
    return Notification.query.filter(
        Notification.reference == reference
    ).one()


def dao_get_notification_or_history_by_reference(reference):
    try:
        # This try except is necessary because in test keys and research mode does not create notification history.
        # Otherwise we could just search for the NotificationHistory object
        return Notification.query.filter(
            Notification.reference == reference
        ).one()
    except NoResultFound:
        return NotificationHistory.query.filter(
            NotificationHistory.reference == reference
        ).one()


def dao_get_notifications_processing_time_stats(start_date, end_date):
    """
    For a given time range, returns the number of notifications sent and the number of
    those notifications that we processed within 10 seconds

    SELECT
    count(notifications),
    coalesce(sum(CASE WHEN sent_at - created_at <= interval '10 seconds' THEN 1 ELSE 0 END), 0)
    FROM notifications
    WHERE
    created_at > 'START DATE' AND
    created_at < 'END DATE' AND
    api_key_id IS NOT NULL AND
    key_type != 'test' AND
    notification_type != 'letter';
    """
    under_10_secs = Notification.sent_at - Notification.created_at <= timedelta(seconds=10)
    sum_column = functions.coalesce(functions.sum(
        case(
            [
                (under_10_secs, 1)
            ],
            else_=0
        )
    ), 0)

    return db.session.query(
        func.count(Notification.id).label('messages_total'),
        sum_column.label('messages_within_10_secs')
    ).filter(
        Notification.created_at >= start_date,
        Notification.created_at < end_date,
        Notification.api_key_id.isnot(None),
        Notification.key_type != KEY_TYPE_TEST,
        Notification.notification_type != LETTER_TYPE
    ).one()


def dao_get_last_notification_added_for_job_id(job_id):
    last_notification_added = Notification.query.filter(
        Notification.job_id == job_id
    ).order_by(
        Notification.job_row_number.desc()
    ).first()

    return last_notification_added


def notifications_not_yet_sent(should_be_sending_after_seconds, notification_type):
    older_than_date = datetime.utcnow() - timedelta(seconds=should_be_sending_after_seconds)

    notifications = Notification.query.filter(
        Notification.created_at <= older_than_date,
        Notification.notification_type == notification_type,
        Notification.status == NOTIFICATION_CREATED
    ).all()
    return notifications


def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000):
    """
    Return all letters created before the print run deadline that have not yet been sent. This yields in batches of 10k
    to prevent the query taking too long and eating up too much memory. As each 10k batch is yielded, the
    get_key_and_size_of_letters_to_be_sent_to_print function will go and fetch the s3 data, andhese  start sending off
    tasks to the notify-ftp app to send them.

    CAUTION! Modify this query with caution. Modifying filters etc is fine, but if we join onto another table, then
    there may be undefined behaviour. Essentially we need each ORM object returned for each row to be unique,
    and we should avoid modifying state of returned objects.

    For more reading:
    https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
    https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html
    """
    notifications = Notification.query.filter(
        Notification.created_at < convert_bst_to_utc(print_run_deadline),
        Notification.notification_type == LETTER_TYPE,
        Notification.status == NOTIFICATION_CREATED,
        Notification.key_type == KEY_TYPE_NORMAL,
        Notification.postage == postage,
        Notification.billable_units > 0
    ).order_by(
        Notification.service_id,
        Notification.created_at
    ).yield_per(query_limit)
    return notifications


def dao_get_letters_and_sheets_volume_by_postage(print_run_deadline):
    notifications = db.session.query(
        func.count(Notification.id).label('letters_count'),
        func.sum(Notification.billable_units).label('sheets_count'),
        Notification.postage
    ).filter(
        Notification.created_at < convert_bst_to_utc(print_run_deadline),
        Notification.notification_type == LETTER_TYPE,
        Notification.status == NOTIFICATION_CREATED,
        Notification.key_type == KEY_TYPE_NORMAL,
        Notification.billable_units > 0
    ).group_by(
        Notification.postage
    ).order_by(
        Notification.postage
    ).all()
    return notifications


def dao_old_letters_with_created_status():
    yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1)
    last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0)

    notifications = Notification.query.filter(
        Notification.created_at < convert_bst_to_utc(last_processing_deadline),
        Notification.notification_type == LETTER_TYPE,
        Notification.status == NOTIFICATION_CREATED
    ).order_by(
        Notification.created_at
    ).all()
    return notifications


def letters_missing_from_sending_bucket(seconds_to_subtract):
    older_than_date = datetime.utcnow() - timedelta(seconds=seconds_to_subtract)
    # We expect letters to have a `created` status, updated_at timestamp and billable units greater than zero.
    notifications = Notification.query.filter(
        Notification.billable_units == 0,
        Notification.updated_at == None,  # noqa
        Notification.status == NOTIFICATION_CREATED,
        Notification.created_at <= older_than_date,
        Notification.notification_type == LETTER_TYPE,
        Notification.key_type == KEY_TYPE_NORMAL
    ).order_by(
        Notification.created_at
    ).all()

    return notifications


def dao_precompiled_letters_still_pending_virus_check():
    ninety_minutes_ago = datetime.utcnow() - timedelta(seconds=5400)

    notifications = Notification.query.filter(
        Notification.created_at < ninety_minutes_ago,
        Notification.status == NOTIFICATION_PENDING_VIRUS_CHECK
    ).order_by(
        Notification.created_at
    ).all()
    return notifications


def _duplicate_update_warning(notification, status):
    current_app.logger.info(
        (
            'Duplicate callback received for service {service_id}. '
            'Notification ID {id} with type {type} sent by {sent_by}. '
            'New status was {new_status}, current status is {old_status}. '
            'This happened {time_diff} after being first set.'
        ).format(
            id=notification.id,
            old_status=notification.status,
            new_status=status,
            time_diff=datetime.utcnow() - (notification.updated_at or notification.created_at),
            type=notification.notification_type,
            sent_by=notification.sent_by,
            service_id=notification.service_id
        )
    )


def get_service_ids_with_notifications_before(notification_type, timestamp):
    return {
        row.service_id
        for row in db.session.query(
            Notification.service_id
        ).filter(
            Notification.notification_type == notification_type,
            Notification.created_at < timestamp
        ).distinct()
    }


def get_service_ids_with_notifications_on_date(notification_type, date):
    start_date = get_london_midnight_in_utc(date)
    end_date = get_london_midnight_in_utc(date + timedelta(days=1))

    notification_table_query = db.session.query(
        Notification.service_id.label('service_id')
    ).filter(
        Notification.notification_type == notification_type,
        # using >= + < is much more efficient than date(created_at)
        Notification.created_at >= start_date,
        Notification.created_at < end_date,
    )

    # Looking at this table is more efficient for historical notifications,
    # provided the task to populate it has run before they were archived.
    ft_status_table_query = db.session.query(
        FactNotificationStatus.service_id.label('service_id')
    ).filter(
        FactNotificationStatus.notification_type == notification_type,
        FactNotificationStatus.bst_date == date,
    )

    return {
        row.service_id for row in db.session.query(union(
            notification_table_query, ft_status_table_query
        ).subquery()).distinct()
    }
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								from datetime import datetime, timedelta
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								from itertools import groupby
 								from operator import attrgetter
-												[WIP] On create of notification. Upsert record for template stats
recording usages of template by day.

											
										
										
											2016-03-31 15:57:50 +01:00
-												Use correct exception for boto3

We use boto3 for our interaction with s3. Therefore if an expection is
thrown it will be thrown from the botocore library (which boto3 is built
on top of).

I have copied
app/aws/s3.py::file_exists for an example of this exception catching.

											
										
										
											2020-02-12 15:28:46 +00:00
+								from botocore.exceptions import ClientError
-												Fetch endpoints for notifications

- includes check on token type to ensure clients can perform admin style fetches

											
										
										
											2016-03-01 13:30:10 +00:00
+								from flask import current_app
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								from notifications_utils.international_billing_rates import (
 								    INTERNATIONAL_BILLING_RATES,
 								)
-												Search notification against normalised recipient with filter for status

											
										
										
											2017-05-24 14:24:57 +01:00
+								from notifications_utils.recipients import (
-												Don’t 500 when searching with bad email address

In the future we might want to validate email addresses before
attempting to search by them. But for a first pass we can just return
no results when a user types in something that isn’t an email address
or phone number.

It definitely better than returning a 500.

											
										
										
											2017-05-30 14:40:27 +01:00
+								    InvalidEmailError,
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								    try_validate_and_format_phone_number,
 								    validate_and_format_email_address,
-												Search notification against normalised recipient with filter for status

											
										
										
											2017-05-24 14:24:57 +01:00
+								)
-												Add scheduled task to find old letters which still have 'created' status

Added a scheduled task to run once a day and check if there were any
letters from before 17.30 that still have a status of 'created'. This
logs an exception instead of trying to fix the error because the fix
will be different depending on which bucket the letter is in.

											
										
										
											2019-06-11 15:13:06 +01:00
+								from notifications_utils.timezones import convert_bst_to_utc, convert_utc_to_bst
-												Fix out-of-date rows in ft_notification_status

This can happen in the following scenario (primarily for letters):

1. A service has a mixture of "delivered" and "sending" letters,
which the status task aggregates into two rows:

  sending | 123
  delivered | 456

2. After the 7 day retention has passed, only the "delivered" letters
will be archived [^1].

3. The status task now looks at the history table [^2], which means
it only sees the "delivered" letters.

4. The "sending" letters are eventually "delivered" and archived (before
the 10 day aggregation cutoff).

5. But the status aggregation task doesn't run.

This commit fixes (5).

[^1]: https://github.com/alphagov/notifications-api/pull/3063
[^2]: https://github.com/alphagov/notifications-api/blob/f87ebb094d9c2037ce6a4c4a38bab87daa868be3/app/dao/fact_notification_status_dao.py#L51

											
										
										
											2022-05-10 11:14:59 +01:00
+								from sqlalchemy import and_, asc, desc, func, or_, union
-												add actual_template relationship to notification

also renamed the function to make it apparent that it'll join and grab personalisation

											
										
										
											2016-08-09 13:07:48 +01:00
+								from sqlalchemy.orm import joinedload
-												Prepare to stop inserting NotificationHistory at the time of inserting a notificaiton.
Need to remove foreign key to complaints.
Make sure if getting Notification.id we look to both tables.

											
										
										
											2019-05-21 16:08:18 +01:00
+								from sqlalchemy.orm.exc import NoResultFound
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								from sqlalchemy.sql import functions
-												Removed unused method

											
										
										
											2018-12-10 16:27:59 +00:00
+								from sqlalchemy.sql.expression import case
 								from werkzeug.datastructures import MultiDict
-												[WIP] On create of notification. Upsert record for template stats
recording usages of template by day.

											
										
										
											2016-03-31 15:57:50 +01:00
-												Replace log with StatsD gauge for slow delivery

A gauge is more useful as we can visualise it and combine it with
other stats - we already have other stats for the total number of
notifications sent by provider, and we can extrapolate the number
of slow notifications using this, if needed.

We also still have logs to say the task is running, as well as a
log in the calling code when we actually make a switch [1], so
we're not losing anything by removing the log here.

[1]: https://github.com/alphagov/notifications-api/blob/a9306c45573402495d68f6948a9eb3d6ffbac28c/app/celery/scheduled_tasks.py#L117

											
										
										
											2021-12-14 12:26:00 +00:00
+								from app import create_uuid, db, statsd_client
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								from app.dao.dao_utils import autocommit
-												Reduce extra S3 ops when working with letter PDFs

Previously we did some unnecessary work:

- Collate task. This had one S3 request to get a summary of the object,
which was then used in another request to get the full object. We only
need the size of the object, which is included in the summary [1].

- Archive task. This had one S3 request to get a summary of the object,
which was then used to make another request to delete it. We still need
both requests, but we can remove the S3.Object in the middle.

[1]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#objectsummary

											
										
										
											2021-03-16 11:57:33 +00:00
+								from app.letters.utils import LetterPDFNotFound, find_letter_pdf_in_s3
-												Filtering added and tests working.

											
										
										
											2016-03-21 12:37:34 +00:00
+								from app.models import (
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								    EMAIL_TYPE,
-												Add an alert when a letter is created but doesn't have a file in S3 for sending. We can tell this is the case because there is no updated_at and billable units are still 0.

At this point we are just creating a zendesk ticket - perhaps we can just call the create_letter_pdf task.

											
										
										
											2019-11-13 16:39:59 +00:00
+								    KEY_TYPE_NORMAL,
-												Changes after review. Style changes to ensure consistency with the rest of the code base to ensure good maintainability in future.

											
										
										
											2017-10-05 16:29:11 +01:00
+								    KEY_TYPE_TEST,
 								    LETTER_TYPE,
-												Update the timeout_notifications scheduled tasks.
We found that if the notifications were in created or pending they are not purged from notifications.
- New bulk update method to set all notificaitons with:
  -  a status = created|sending|pending to temporary-failure
  - and is older then today minus SENDING_NOTIFICATIONS_TIMEOUT_PERIOD (in seconds)
- the scheduled task to timeout notifications use the new bulk update query.
- the task will be more efficient

											
										
										
											2016-09-13 16:42:53 +01:00
+								    NOTIFICATION_CREATED,
-												Add dao method to get count of slow delivery notifications by provider

											
										
										
											2017-02-13 14:27:32 +00:00
+								    NOTIFICATION_DELIVERED,
-												Update the timeout_notifications scheduled tasks.
We found that if the notifications were in created or pending they are not purged from notifications.
- New bulk update method to set all notificaitons with:
  -  a status = created|sending|pending to temporary-failure
  - and is older then today minus SENDING_NOTIFICATIONS_TIMEOUT_PERIOD (in seconds)
- the scheduled task to timeout notifications use the new bulk update query.
- the task will be more efficient

											
										
										
											2016-09-13 16:42:53 +01:00
+								    NOTIFICATION_PENDING,
-												Update update_notification_status_by_id DAO function

Replaced `.with_for_lockmode()`, which is now deprecated, with
`.with_for_update() - https://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query.with_lockmode

The function should update any statuses that are not 'final', so added
`pending-virus-check` to the list of statuses that the function can
update.

											
										
										
											2018-11-13 14:20:24 +00:00
+								    NOTIFICATION_PENDING_VIRUS_CHECK,
-												don't include letters when timing out/deleting old notifications

also refactored those functions, since they were pretty gnarly and
repeated themselves a lot

											
										
										
											2017-04-19 11:34:00 +01:00
+								    NOTIFICATION_PERMANENT_FAILURE,
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								    NOTIFICATION_SENDING,
-												Notification_type is a required parameter, admin app always passes it in.
Normalise for notificaiton type.
Throw InvalidRequest exception is the notification type is invalid.

											
										
										
											2018-03-07 18:13:40 +00:00
+								    NOTIFICATION_SENT,
-												Do not delete letters if not in final state

A few weeks ago, we deleted some pdf letters that had reached their
retention period. However, these letters were in the 'created' state so
it's very arguable that we should not have deleted them because we were
expecting to resend them and were unable to. Part of the reason for this
is that we marked the letters back to `created` as the status but we did
not nullify the `sent_at` timestamp, meaning the check on
https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346
did not catch it. Regardless of that check, which controls whether the
files were removed from S3, they were also archived into the
`notification_history` table as by default.

This commit does changes our code such that letters that are not in
their final state do not go through our retention process. This could
mean they violate their retention policy but that is likely the lesser
of two evils (the other being we delete them and are unable to resend
them).

Note, `sending` letters have been included in those not to be removed
because there is a risk that we give the letter to DVLA and put it in
`sending` but then they come back to us later telling us they've had
problems and require us to resend.

											
										
										
											2020-12-16 10:50:11 +00:00
+								    NOTIFICATION_STATUS_TYPES_COMPLETED,
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								    NOTIFICATION_TEMPORARY_FAILURE,
-												Notification_type is a required parameter, admin app always passes it in.
Normalise for notificaiton type.
Throw InvalidRequest exception is the notification type is invalid.

											
										
										
											2018-03-07 18:13:40 +00:00
+								    SMS_TYPE,
-												Run auto-correct on app/ and tests/

											
										
										
											2021-03-10 13:55:06 +00:00
+								    FactNotificationStatus,
 								    Notification,
 								    NotificationHistory,
 								    ProviderDetails,
-												Get the sms sender from the notificaiton_sms_sender mapping table if that does not exist get the default sms sender to pass on to the sms provider.

											
										
										
											2017-10-30 14:55:44 +00:00
+								)
-												Fix getting service IDs for status aggregation

Addresses [1].

Previously the query would always use UTC midnight, even after we
had switched to BST (+1h). We store timestamps as naive UTC in our
DB - without a timezone - but we want the query to work in terms
of GMT / BST so we adjust for that - BST midnight is 11PM in UTC.

[1]: https://github.com/alphagov/notifications-api/pull/3437#discussion_r791998690

											
										
										
											2022-02-10 10:37:32 +00:00
+								from app.utils import (
 								    escape_special_characters,
 								    get_london_midnight_in_utc,
 								    midnight_n_days_ago,
 								)
-												[WIP] Added dao method and rest endpoint for getting template
statistics by service.

Some cosmetic changes to imports.

Added fix for job rest not correctly returning errors.

											
										
										
											2016-04-04 12:21:38 +01:00
-												[WIP] New model class and dao for notification. This will be used for
recording status and outcome of sending notifications.

											
										
										
											2016-02-09 12:01:17 +00:00
-												Simplify the code in the query.

The date in the notifications table should always be the most recent date for the template.
Removed the template_type param for the query as well.
Simplified the tests.

											
										
										
											2020-02-05 16:43:17 +00:00
+								def dao_get_last_date_template_was_used(template_id, service_id):
-												Added a new endpoint to return the last used date for a template.

The existing endpoint returned a whole notification for the last time the template was used. But this only takes into account data in the last week. This new methods allows us to be specific about when the template was last used if ever but looking into the ft_notification_status table as well.

											
										
										
											2020-02-05 13:03:54 +00:00
+								    last_date_from_notifications = db.session.query(
 								        functions.max(Notification.created_at)
 								    ).filter(
 								        Notification.service_id == service_id,
 								        Notification.template_id == template_id,
 								        Notification.key_type != KEY_TYPE_TEST
 								    ).scalar()
-												Simplify the code in the query.

The date in the notifications table should always be the most recent date for the template.
Removed the template_type param for the query as well.
Simplified the tests.

											
										
										
											2020-02-05 16:43:17 +00:00
+								    if last_date_from_notifications:
-												Added a new endpoint to return the last used date for a template.

The existing endpoint returned a whole notification for the last time the template was used. But this only takes into account data in the last week. This new methods allows us to be specific about when the template was last used if ever but looking into the ft_notification_status table as well.

											
										
										
											2020-02-05 13:03:54 +00:00
+								        return last_date_from_notifications
-												Simplify the code in the query.

The date in the notifications table should always be the most recent date for the template.
Removed the template_type param for the query as well.
Simplified the tests.

											
										
										
											2020-02-05 16:43:17 +00:00
 								    last_date = db.session.query(
 								        functions.max(FactNotificationStatus.bst_date)
 								    ).filter(
 								        FactNotificationStatus.template_id == template_id,
 								        FactNotificationStatus.key_type != KEY_TYPE_TEST
 								    ).scalar()
 								    return last_date
-												Added a new endpoint to return the last used date for a template.

The existing endpoint returned a whole notification for the last time the template was used. But this only takes into account data in the last week. This new methods allows us to be specific about when the template was last used if ever but looking into the ft_notification_status table as well.

											
										
										
											2020-02-05 13:03:54 +00:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												Removed contented updates the notifications stats table

- As before this is now driven from the notifications history table

- Removed from updates and create
- Signatures changes to removed unused params hits many files
- Also potential issue around rate limiting - we used to get the number sent per day from the stats table - which was a single row lookup, now we have to count this. This applies to EVERY API CALL. Probably not a good thing and should be addressed urgently.

											
										
										
											2016-08-25 11:55:38 +01:00
+								def dao_create_notification(notification):
-												update notification_history table from notification_dao create/update functions

please ensure that any changes to notifications table happen through either dao_create_notification or dao_update_notification.
changed the notification status update triggered by the provider callbacks to ensure that sets updated_by and can update the history table.
also re-added the character_count so we can reconstruct billing data if needed.

											
										
										
											2016-07-11 16:48:32 +01:00
+								    if not notification.id:
 								        # need to populate defaulted fields before we create the notification history object
-												- Refactor version 1 of post notificaitons to use the common persist_notificaiton and send_notification_to_queue methods.
- It would be nice to refactor the send_sms and send_email tasks to use these common functions as well, that way I can get rid of the new Notifications.from_v2_api_request method.
- Still not happy with the format of the errors. Would like to find a happy place, where the message is descript enough that we do not need external documentation to explain the error. Perhaps we still only need documentation to explain the trial mode concept.

											
										
										
											2016-10-28 17:10:00 +01:00
+								        notification.id = create_uuid()
-												update notification_history table from notification_dao create/update functions

please ensure that any changes to notifications table happen through either dao_create_notification or dao_update_notification.
changed the notification status update triggered by the provider callbacks to ensure that sets updated_by and can update the history table.
also re-added the character_count so we can reconstruct billing data if needed.

											
										
										
											2016-07-11 16:48:32 +01:00
+								    if not notification.status:
-												don't include letters when timing out/deleting old notifications

also refactored those functions, since they were pretty gnarly and
repeated themselves a lot

											
										
										
											2017-04-19 11:34:00 +01:00
+								        notification.status = NOTIFICATION_CREATED
-												update notification_history table from notification_dao create/update functions

please ensure that any changes to notifications table happen through either dao_create_notification or dao_update_notification.
changed the notification status update triggered by the provider callbacks to ensure that sets updated_by and can update the history table.
also re-added the character_count so we can reconstruct billing data if needed.

											
										
										
											2016-07-11 16:48:32 +01:00
-												[WIP] Added dao method and rest endpoint for getting template
statistics by service.

Some cosmetic changes to imports.

Added fix for job rest not correctly returning errors.

											
										
										
											2016-04-04 12:21:38 +01:00
+								    db.session.add(notification)
-												When creating a notification don't create a history row if:
1) research mode service
2) test mode key

Stop test data getting into history and subsequently into stats and so on.

											
										
										
											2016-12-19 13:57:06 +00:00
-												update delivery receipts for countries that return them

some countries don't return delivery receipts
some countries return delivery receipts when they reach the carrier

these countries, we should keep the notifications in sent (aka
sent_internatinally) for. However, for countries that have normal
delivery receipts, we should update them as we do for UK numbers

											
										
										
											2017-05-12 14:59:14 +01:00
+								def country_records_delivery(phone_prefix):
-												fix 500 when receiving delivery receipt from some international nums

if the international_billing_rates.yml has `dlr: null`, that means we
don't know what delivery receipts they provide - they might not provide
any. So if we do get an update, we don't know for sure that the message
was actually delivered - lets not update it.

											
										
										
											2018-01-29 11:41:46 +00:00
+								    dlr = INTERNATIONAL_BILLING_RATES[phone_prefix]['attributes']['dlr']
 								    return dlr and dlr.lower() == 'yes'
-												update delivery receipts for countries that return them

some countries don't return delivery receipts
some countries return delivery receipts when they reach the carrier

these countries, we should keep the notifications in sent (aka
sent_internatinally) for. However, for countries that have normal
delivery receipts, we should update them as we do for UK numbers

											
										
										
											2017-05-12 14:59:14 +01:00
-												Change function and variable names for readability and consistency

											
										
										
											2020-06-01 11:45:35 +01:00
+								def _update_notification_status(notification, status, detailed_status_code=None):
-												Get tests passing locally

When we cloned the repository and started making modifications, we
didn't initially keep tests in step. This commit tries to get us to a
clean test run by skipping tests that are failing and removing some
that we no longer expect to use (MMG, Firetext), with the intention that
we will come back in future and update or remove them as appropriate.

To find all tests skipped, search for `@pytest.mark.skip(reason="Needs
updating for TTS:`. There will be a brief description of the work that
needs to be done to get them passing, if known. Delete that line to make
them run in a standard test run (`make test`).

											
										
										
											2022-07-05 11:27:15 -07:00
+								    # status = _decide_permanent_temporary_failure(
 								    #     status=status, notification=notification, detailed_status_code=detailed_status_code
 								    # )
 								    # notification.status = status
 								    # dao_update_notification(notification)
-												Change DAO to return the notification from an update, not Boolean

- tests reflect this

											
										
										
											2016-09-13 12:29:40 +01:00
+								    return notification
-												Endpoint to allow SES updates to occur

- update notification with delivery state

											
										
										
											2016-03-10 17:29:17 +00:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												Change function and variable names for readability and consistency

											
										
										
											2020-06-01 11:45:35 +01:00
+								def update_notification_status_by_id(notification_id, status, sent_by=None, detailed_status_code=None):
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								    notification = Notification.query.with_for_update().filter(Notification.id == notification_id).first()
-												Make update notification status transcational.
Refactor methods for reusability.

											
										
										
											2016-05-27 12:09:36 +01:00
-												update delivery receipts for countries that return them

some countries don't return delivery receipts
some countries return delivery receipts when they reach the carrier

these countries, we should keep the notifications in sent (aka
sent_internatinally) for. However, for countries that have normal
delivery receipts, we should update them as we do for UK numbers

											
										
										
											2017-05-12 14:59:14 +01:00
+								    if not notification:
-												downgrade error to info for notification not found for callback

When we get a callback from SES, we identify the notification by the
SES reference that we set on the notification after sending. When we
wrote the log message, we assumed that we'd always have a notification
for every callback, so if one couldn't be found we would raise an error
log. This isn't the case for a few reasons:

* We might receive a callback before the sender worker has persisted
  the reference to the database.
* We might have deleted the notification, especially if the service has
  a short data retention period
* We sometimes receive callbacks for references that we have no record
  of whatsoever (this is quite alarming but we have no way of knowing
  why this happens)

The error logs were happening pretty frequently, and we don't have a
real way to solve them at the moment, so lets cut down on noise and
downgrade them to info level for now.

											
										
										
											2019-01-29 15:55:31 +00:00
+								        current_app.logger.info('notification not found for id {} (update to status {})'.format(
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								            notification_id,
 								            status
 								        ))
 								        return None
 								    if notification.status not in {
 								        NOTIFICATION_CREATED,
 								        NOTIFICATION_SENDING,
 								        NOTIFICATION_PENDING,
 								        NOTIFICATION_SENT,
 								        NOTIFICATION_PENDING_VIRUS_CHECK
 								    }:
 								        _duplicate_update_warning(notification, status)
-												update delivery receipts for countries that return them

some countries don't return delivery receipts
some countries return delivery receipts when they reach the carrier

these countries, we should keep the notifications in sent (aka
sent_internatinally) for. However, for countries that have normal
delivery receipts, we should update them as we do for UK numbers

											
										
										
											2017-05-12 14:59:14 +01:00
+								        return None
-												Allow international letters to be cancelled

Our code was assuming that any notifications with `international` set to
`True` were text messages. It was then trying to look up delivery
information for a notification which wasn’t sent to a phone number,
causing an exception.

											
										
										
											2020-09-09 10:55:55 +01:00
+								    if (
-												Use constant for notification type

Co-authored-by: Katie Smith <klssmith@users.noreply.github.com>
											
										
										
											2020-09-09 11:12:06 +01:00
+								        notification.notification_type == SMS_TYPE
-												Allow international letters to be cancelled

Our code was assuming that any notifications with `international` set to
`True` were text messages. It was then trying to look up delivery
information for a notification which wasn’t sent to a phone number,
causing an exception.

											
										
										
											2020-09-09 10:55:55 +01:00
+								        and notification.international
 								        and not country_records_delivery(notification.phone_prefix)
 								    ):
-												Change DAO to return the notification from an update, not Boolean

- tests reflect this

											
										
										
											2016-09-13 12:29:40 +01:00
+								        return None
-												Some minor refactoring.
- Updated notifications_dao.update_notification_status_by_id with an optional parameter to set the sent_by, this will eliminate a separate update to notifcaitons.
- Added the callback url to the log message, that way we can see if it's the same url failing.
- Stop sending the status callbacks for PENDING status.

											
										
										
											2018-10-24 11:24:53 +01:00
+								    if not notification.sent_by and sent_by:
 								        notification.sent_by = sent_by
-												Reorder updates to the notification / stats tables to match create.

											
										
										
											2016-05-31 10:17:15 +01:00
+								    return _update_notification_status(
 								        notification=notification,
-												Use firetext response code to see if temporary or permanent failure if available

											
										
										
											2020-04-14 15:55:17 +01:00
+								        status=status,
-												Change function and variable names for readability and consistency

											
										
										
											2020-06-01 11:45:35 +01:00
+								        detailed_status_code=detailed_status_code
-												Reorder updates to the notification / stats tables to match create.

											
										
										
											2016-05-31 10:17:15 +01:00
+								    )
-												Adds notification stats update into the callback process

- when a provider callback occurs and we update the status of the notification, also update the statistics table

Adds:
- Mapping object to the clients to handle mapping to various states from the response codes, this replaces the map.
- query lookup in the DAO to get the query based on response type / template type

Tests around rest class and dao to check correct updating of stats

Missing:
- multiple client callbacks will keep incrementing the counts of success/failure. This edge case needs to be handle in a future story.

											
										
										
											2016-03-21 13:24:37 +00:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												Removed contented updates the notifications stats table

- As before this is now driven from the notifications history table

- Removed from updates and create
- Signatures changes to removed unused params hits many files
- Also potential issue around rate limiting - we used to get the number sent per day from the stats table - which was a single row lookup, now we have to count this. This applies to EVERY API CALL. Probably not a good thing and should be addressed urgently.

											
										
										
											2016-08-25 11:55:38 +01:00
+								def update_notification_status_by_reference(reference, status):
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								    # this is used to update letters and emails
 								    notification = Notification.query.filter(Notification.reference == reference).first()
 								    if not notification:
 								        current_app.logger.error('notification not found for reference {} (update to {})'.format(reference, status))
 								        return None
 								    if notification.status not in {
 								        NOTIFICATION_SENDING,
-												make sure log line works if notification still in created

											
										
										
											2018-12-28 14:29:59 +00:00
+								        NOTIFICATION_PENDING
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								    }:
 								        _duplicate_update_warning(notification, status)
-												Change DAO to return the notification from an update, not Boolean

- tests reflect this

											
										
										
											2016-09-13 12:29:40 +01:00
+								        return None
-												Make update notification status transcational.
Refactor methods for reusability.

											
										
										
											2016-05-27 12:09:36 +01:00
-												Reorder updates to the notification / stats tables to match create.

											
										
										
											2016-05-31 10:17:15 +01:00
+								    return _update_notification_status(
 								        notification=notification,
-												Removed contented updates the notifications stats table

- As before this is now driven from the notifications history table

- Removed from updates and create
- Signatures changes to removed unused params hits many files
- Also potential issue around rate limiting - we used to get the number sent per day from the stats table - which was a single row lookup, now we have to count this. This applies to EVERY API CALL. Probably not a good thing and should be addressed urgently.

											
										
										
											2016-08-25 11:55:38 +01:00
+								        status=status
-												Reorder updates to the notification / stats tables to match create.

											
										
										
											2016-05-31 10:17:15 +01:00
+								    )
-												Ad a reference to the model

- used if 3rd party needs to record an ID for reconciliation purposes

											
										
										
											2016-03-11 09:40:35 +00:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												Firetext does not have a status code for temporary-failure.
In order to set a message as temporary-failure, we check if it is in pending status first.
Otherwise a delivery receipt for failure is set to permanent failure.

											
										
										
											2016-05-26 16:46:00 +01:00
+								def dao_update_notification(notification):
 								    notification.updated_at = datetime.utcnow()
 								    db.session.add(notification)
-												Added page_size parameter for notifications api. All tests passing.

Add page_size and total parameters to all calls for notifications.

											
										
										
											2016-04-19 10:52:52 +01:00
+								def get_notifications_for_job(service_id, job_id, filter_dict=None, page=1, page_size=None):
 								    if page_size is None:
 								        page_size = current_app.config['PAGE_SIZE']
-												Filtering added and tests working.

											
										
										
											2016-03-21 12:37:34 +00:00
+								    query = Notification.query.filter_by(service_id=service_id, job_id=job_id)
-												Update job.notifications_delivered and job.notifications_failed count when updating the status of a notification.

											
										
										
											2016-05-24 11:31:44 +01:00
+								    query = _filter_query(query, filter_dict)
-												Added job row number to the notification for csv jobs. All tests passing.

											
										
										
											2016-05-19 10:46:03 +01:00
+								    return query.order_by(asc(Notification.job_row_number)).paginate(
-												Capture the count of sent notifications for a job

											
										
										
											2016-03-04 14:25:28 +00:00
+								        page=page,
-												Added page_size parameter for notifications api. All tests passing.

Add page_size and total parameters to all calls for notifications.

											
										
										
											2016-04-19 10:52:52 +01:00
+								        per_page=page_size
-												Capture the count of sent notifications for a job

											
										
										
											2016-03-04 14:25:28 +00:00
+								    )
-												Fixed up the get_notitication endpoint

- returns a notification

											
										
										
											2016-02-16 11:22:44 +00:00
-												Update the new endpoint to return a 404 if the job or service id are not found.

All our endpoint should perform a check that the params are valid - this is an easy whay to check that and is standard for our endpoints.
I reverted the query to just filter by job id.

											
										
										
											2019-10-03 14:58:49 +01:00
+								def dao_get_notification_count_for_job_id(*, job_id):
 								    return Notification.query.filter_by(job_id=job_id).count()
-												Return count of notifications in the database for a job

When we cancel a job, we need to check if all notifications are
already in the database. So far, we were querying for all
notification objects in the database and counting them in
admin app, which runs into pagination problems for large jobs,
and could time out for very large jobs.

											
										
										
											2019-09-24 16:52:18 +01:00
-												add actual_template relationship to notification

also renamed the function to make it apparent that it'll join and grab personalisation

											
										
										
											2016-08-09 13:07:48 +01:00
+								def get_notification_with_personalisation(service_id, notification_id, key_type):
-												add filters to GET /notifications endpoints to only return for provided key_type

if api_key used to access endpoint is type team, endpoints only return that type -
will 404 if you provide a different ID. Same applies for normal (normal api keys
cannot see team notifications)
also, for convenience, set sample_notification to supply key_type of KEY_TYPE_NORMAL
by default

											
										
										
											2016-06-30 18:43:15 +01:00
+								    filter_dict = {'service_id': service_id, 'id': notification_id}
 								    if key_type:
 								        filter_dict['key_type'] = key_type
-												Update notification queries with new TemplateHistory join

Changes the queries to use `Notification.template` relationship instead
of `.template_history` and TemplateHistory model insteaad of Template.

											
										
										
											2017-11-09 14:25:47 +00:00
+								    return Notification.query.filter_by(**filter_dict).options(joinedload('template')).one()
-												Fetch endpoints for notifications

- includes check on token type to ensure clients can perform admin style fetches

											
										
										
											2016-03-01 13:30:10 +00:00
-												Update get_notification_by_id to take an optional service_id

It can be useful to get a notification by id while checking that the
notification belongs to a given service. This changes the
get_notification_by_id DAO function to optionally also filter by
service_id so that we can check this.

											
										
										
											2018-11-15 10:55:29 +00:00
+								def get_notification_by_id(notification_id, service_id=None, _raise=False):
 								    filters = [Notification.id == notification_id]
 								    if service_id:
 								        filters.append(Notification.service_id == service_id)
 								    query = Notification.query.filter(*filters)
 								    return query.one() if _raise else query.first()
-												Updates to fire text integration:

- client updated to raise errors with fire text error codes/messages

New endpoint
- /notifications/sms/firetext
For delivery notifications to be sent to.

											
										
										
											2016-03-10 15:40:41 +00:00
-												Make indentation more sensible

Starting arguments on their own line and putting the closing parenthesis
on it’s own line because any subsequent changes to the arguments diff
cleanly (ie without touching any other lines).

											
										
										
											2016-09-23 09:43:25 +01:00
+								def get_notifications_for_service(
-												If the notifications that are being deleted are letters then we need to delete the letter from s3 as well.

											
										
										
											2018-08-08 16:20:25 +01:00
+								        service_id,
 								        filter_dict=None,
 								        page=1,
 								        page_size=None,
-												Don't return pagination links for API Message log requests

Flask-SQLAlchemy paginate function issues a separate query to get
the total count of rows for a given filter. This query (with
filters used by the API integration Message log page) is slow for
services with large number of notifications.

Since Message log page doesn't actually allow users to paginate
through the response (it only shows the last 50 messages) we can
use limit instead of paginate, which requires passing in another
flag from admin to the dao method.

`count` flag has been added to `paginate` in March 2018, however
there was no release of flask-sqlalchemy since then, so we need
to pull the dev version of the package from Github.

											
										
										
											2019-01-07 17:12:00 +00:00
+								        count_pages=True,
-												If the notifications that are being deleted are letters then we need to delete the letter from s3 as well.

											
										
										
											2018-08-08 16:20:25 +01:00
+								        limit_days=None,
 								        key_type=None,
 								        personalisation=False,
 								        include_jobs=False,
 								        include_from_test_key=False,
 								        older_than=None,
 								        client_reference=None,
-												Optimise queries run for creating pagination links

We have been running in to the problem in
https://github.com/pallets/flask-sqlalchemy/issues/518 where
our page loads very slow when viewing a single page of notifications
for a service in the admin app. Tracing this back and using SQL
explain analyze I can see that getting the notifications takes about
a second but the second query to count how many notifications there
are (to work out if there is a next page of pagination) can take up
to 100 seconds.

As suggested in that issue, we do the pagination ourselves.
Our pagination doesn't need us to know exactly how many notifications
there are, just whether there are any on the next page and that can
be done without running the slow query to count how many
notifications in total by using `count_pages=False`.

											
										
										
											2021-12-03 17:07:03 +00:00
+								        include_one_off=True,
 								        error_out=True
-												Make indentation more sensible

Starting arguments on their own line and putting the closing parenthesis
on it’s own line because any subsequent changes to the arguments diff
cleanly (ie without touching any other lines).

											
										
										
											2016-09-23 09:43:25 +01:00
+								):
-												Added page_size parameter for notifications api. All tests passing.

Add page_size and total parameters to all calls for notifications.

											
										
										
											2016-04-19 10:52:52 +01:00
+								    if page_size is None:
 								        page_size = current_app.config['PAGE_SIZE']
-												Ensure that querying for notifications does not return CSV data unless:

- It's explicitly asked for AND
- you are using a LIVE key

Team and Test keys CANNOT retrieve CSV/Job derived notifications

											
										
										
											2016-09-15 15:59:02 +01:00
-												Added limit_days filter to get notifications dao.

											
										
										
											2016-04-28 16:10:35 +01:00
+								    filters = [Notification.service_id == service_id]
 								    if limit_days is not None:
-												rename days_ago to midnight_n_days_ago

also add some more timezone boundary tests and minor code cleanup

											
										
										
											2018-04-30 11:50:56 +01:00
+								        filters.append(Notification.created_at >= midnight_n_days_ago(limit_days))
-												Added limit_days filter to get notifications dao.

											
										
										
											2016-04-28 16:10:35 +01:00
-												Create 'v2' get notifications route

Return multiple notifications for a service.
Choosing a page_size or a page_number is no longer allowed.
Instead, there is a `next` link included with will return the
next {default_page_size} notifications in the sequence.

Query parameters accepted are:

 - template_type: filter by specific template types
 - status: filter by specific statuses
 - older_than: return a chronological list of notifications older
   than this one. The notification with the id that is passed in
   is _not_ returned.

Note that both `template_type` and `status` can accept multiple
parameters.  Thus it is possible to call
`/v2/notifications?status=created&status=sending&status=delivered`

											
										
										
											2016-11-23 11:44:38 +00:00
+								    if older_than is not None:
 								        older_than_created_at = db.session.query(
 								            Notification.created_at).filter(Notification.id == older_than).as_scalar()
 								        filters.append(Notification.created_at < older_than_created_at)
-												Add one_off filter when getting all notifications for a service

Added the option to filter by one_off messages to the DAO function
`get_notifications_for_service`. Previously, one-off notifications
were not returned - this has changed so that the default is for
one-off notifications to be returned. Also simplified the `include_jobs`
filter for this function.

The DAO function gets used in 3 places - for the V1 and V2 API endpoints,
which will now start to return one-off messages. It also gets used by
the admin app which needs to pass in `include_one_off=False` to the
`get_all_notifications_for_service` where we don't want one-off
notifications to show, such as the API message log page.

											
										
										
											2018-07-18 10:54:20 +01:00
+								    if not include_jobs:
 								        filters.append(Notification.job_id == None)  # noqa
 								    if not include_one_off:
 								        filters.append(Notification.created_by_id == None)  # noqa
-												Ensure that querying for notifications does not return CSV data unless:

- It's explicitly asked for AND
- you are using a LIVE key

Team and Test keys CANNOT retrieve CSV/Job derived notifications

											
										
										
											2016-09-15 15:59:02 +01:00
-												add filters to GET /notifications endpoints to only return for provided key_type

if api_key used to access endpoint is type team, endpoints only return that type -
will 404 if you provide a different ID. Same applies for normal (normal api keys
cannot see team notifications)
also, for convenience, set sample_notification to supply key_type of KEY_TYPE_NORMAL
by default

											
										
										
											2016-06-30 18:43:15 +01:00
+								    if key_type is not None:
 								        filters.append(Notification.key_type == key_type)
-												Make DAO optionally return test key notifications

Developers need visibility of what their integration is doing within
the app. This includes notifications sent with a test key.

This commit adds an optional, defaults-to-false parameter to include
notifications sent from a test API key when getting notifications.

											
										
										
											2016-09-23 10:27:10 +01:00
+								    elif not include_from_test_key:
-												Exclude test key notifications from:

- get all notifications by service
- template usage
- most recently used templates

Ensures that the dashboard shows no test key data. Supplements: https://github.com/alphagov/notifications-api/pull/677 which excludes CSV data. This branches from that so is dependant.

											
										
										
											2016-09-16 13:47:09 +01:00
+								        filters.append(Notification.key_type != KEY_TYPE_TEST)
-												Retrieve notifications by reference:
* Use get_notifications instead (references may not be unique)

											
										
										
											2016-12-12 18:04:20 +00:00
+								    if client_reference is not None:
 								        filters.append(Notification.client_reference == client_reference)
-												Added limit_days filter to get notifications dao.

											
										
										
											2016-04-28 16:10:35 +01:00
+								    query = Notification.query.filter(*filters)
-												Update job.notifications_delivered and job.notifications_failed count when updating the status of a notification.

											
										
										
											2016-05-24 11:31:44 +01:00
+								    query = _filter_query(query, filter_dict)
-												add actual_template relationship to notification

also renamed the function to make it apparent that it'll join and grab personalisation

											
										
										
											2016-08-09 13:07:48 +01:00
+								    if personalisation:
-												update schema to use template_history for accurate template details

only in the public notification endpoint so far for fear of breaking
things - in an ideal world i'd remove the template relationship
from models entirely and replace that with actual_template

											
										
										
											2016-08-09 16:53:09 +01:00
+								        query = query.options(
-												Update notification queries with new TemplateHistory join

Changes the queries to use `Notification.template` relationship instead
of `.template_history` and TemplateHistory model insteaad of Template.

											
										
										
											2017-11-09 14:25:47 +00:00
+								            joinedload('template')
-												add actual_template relationship to notification

also renamed the function to make it apparent that it'll join and grab personalisation

											
										
										
											2016-08-09 13:07:48 +01:00
+								        )
-												Ensure that querying for notifications does not return CSV data unless:

- It's explicitly asked for AND
- you are using a LIVE key

Team and Test keys CANNOT retrieve CSV/Job derived notifications

											
										
										
											2016-09-15 15:59:02 +01:00
-												Purge functional tests command added with passing test.

Code quality fix.

											
										
										
											2016-05-06 11:07:11 +01:00
+								    return query.order_by(desc(Notification.created_at)).paginate(
-												Fetch endpoints for notifications

- includes check on token type to ensure clients can perform admin style fetches

											
										
										
											2016-03-01 13:30:10 +00:00
+								        page=page,
-												Don't return pagination links for API Message log requests

Flask-SQLAlchemy paginate function issues a separate query to get
the total count of rows for a given filter. This query (with
filters used by the API integration Message log page) is slow for
services with large number of notifications.

Since Message log page doesn't actually allow users to paginate
through the response (it only shows the last 50 messages) we can
use limit instead of paginate, which requires passing in another
flag from admin to the dao method.

`count` flag has been added to `paginate` in March 2018, however
there was no release of flask-sqlalchemy since then, so we need
to pull the dev version of the package from Github.

											
										
										
											2019-01-07 17:12:00 +00:00
+								        per_page=page_size,
-												Optimise queries run for creating pagination links

We have been running in to the problem in
https://github.com/pallets/flask-sqlalchemy/issues/518 where
our page loads very slow when viewing a single page of notifications
for a service in the admin app. Tracing this back and using SQL
explain analyze I can see that getting the notifications takes about
a second but the second query to count how many notifications there
are (to work out if there is a next page of pagination) can take up
to 100 seconds.

As suggested in that issue, we do the pagination ourselves.
Our pagination doesn't need us to know exactly how many notifications
there are, just whether there are any on the next page and that can
be done without running the slow query to count how many
notifications in total by using `count_pages=False`.

											
										
										
											2021-12-03 17:07:03 +00:00
+								        count=count_pages,
 								        error_out=error_out,
-												Fetch endpoints for notifications

- includes check on token type to ensure clients can perform admin style fetches

											
										
										
											2016-03-01 13:30:10 +00:00
+								    )
-												Filtering added and tests working.

											
										
										
											2016-03-21 12:37:34 +00:00
-												Update job.notifications_delivered and job.notifications_failed count when updating the status of a notification.

											
										
										
											2016-05-24 11:31:44 +01:00
+								def _filter_query(query, filter_dict=None):
-												Added functionality to allow filtering by multiple arguments.

Removed commented out code.

											
										
										
											2016-04-04 13:13:29 +01:00
+								    if filter_dict is None:
-												Simplify `_filter_query()` function

											
										
										
											2016-11-22 17:30:03 +00:00
+								        return query
 								    multidict = MultiDict(filter_dict)
 								    # filter by status
 								    statuses = multidict.getlist('status')
-												Added functionality to allow filtering by multiple arguments.

Removed commented out code.

											
										
										
											2016-04-04 13:13:29 +01:00
+								    if statuses:
-												Test returning notifications by "?status=failed"

Check that all failure states are returned by asking for
notifications of type "failure".

											
										
										
											2016-11-25 14:55:45 +00:00
+								        statuses = Notification.substitute_status(statuses)
-												Added functionality to allow filtering by multiple arguments.

Removed commented out code.

											
										
										
											2016-04-04 13:13:29 +01:00
+								        query = query.filter(Notification.status.in_(statuses))
-												Simplify `_filter_query()` function

											
										
										
											2016-11-22 17:30:03 +00:00
 								    # filter by template
 								    template_types = multidict.getlist('template_type')
-												Added functionality to allow filtering by multiple arguments.

Removed commented out code.

											
										
										
											2016-04-04 13:13:29 +01:00
+								    if template_types:
-												Remove the join to TemplateHistory.
We are adding an index to Notifications to optimize the get_notifications_for_service. We need to build the index concurrently which can not be run inside a transaction block so the index will need to be run on the db directly.

CREATE INDEX CONCURRENTLY ix_notifications_service_created_at ON notifications (service_id, created_at);
DROP INDEX CONCURRENTLY ix_notifications_service_created_at

											
										
										
											2018-12-11 14:57:10 +00:00
+								        query = query.filter(Notification.notification_type.in_(template_types))
-												Simplify `_filter_query()` function

											
										
										
											2016-11-22 17:30:03 +00:00
-												Fetch endpoints for notifications

- includes check on token type to ensure clients can perform admin style fetches

											
										
										
											2016-03-01 13:30:10 +00:00
+								    return query
-												Scheduled tasks to clean up the database

- tasks run hourly
- uses celery beat to schedule the tasks

4 new tasks
- delete verify codes (after 1 day)
- delete invitations (after 1 day)
- delete successful notifications  (after 1 day)
- delete failed notifications (after 7 days)

Delete methods in the DAO classes

											
										
										
											2016-03-09 17:46:01 +00:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								def insert_notification_history_delete_notifications(
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								    notification_type, service_id, timestamp_to_delete_backwards_from, qry_limit=50000
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								):
-												make delete notification tasks parallel by notification type

we used to do this until apr 2020. Let's try doing it again.
Back then, we had problems with timing. We did two things in spring
2020:

We moved to using an intermediary temp table [1]
We stopped the tasks being parallelised [2]

However, it turned out the real time saving was from changing what
services we delete for [3]. The task was actually CPU-bound rather than
DB-bound, so that's probably why having the tasks in parallel wasn't
helping, since they were all competing for the same CPU. It's worth
trying the parallel steps again now that we're no longer CPU bound.

Note: Temporary tables are in their own postgres schema, and are only
viewable by the current session (session == connection. Each celery
worker process has its own db connection). We don't need to worry about
separate workers both trying to use the same table at once.

I've also added a "DROP ON COMMIT" directive to the table definition
just to ensure it doesn't persist past the task even if there's an
exception. (This also drops on rollback).

Cronitor looks at the three functions separately so we don't need to worry
about the main task taking milliseconds where it used to take hours as
it isn't monitored itself.

I've also removed some unnecessary redundant exception logs.

[1] https://github.com/alphagov/notifications-api/pull/2767
[2] https://github.com/alphagov/notifications-api/pull/2798
[3] https://github.com/alphagov/notifications-api/pull/3381

											
										
										
											2021-12-01 14:28:08 +00:00
+								    """
 								    Delete up to 50,000 notifications that are past retention for a notification type and service.
 								    Steps are as follows:
 								    Create a temporary notifications table
 								    Populate that table with up to 50k notifications that are to be deleted. (Note: no specified order)
 								    Insert everything in the temp table into notification history
 								    Delete from notifications if notification id is in the temp table
 								    Drop the temp table (automatically when the transaction commits)
 								    Temporary tables are in a separate postgres schema, and only visible to the current session (db connection,
 								    in a celery task there's one connection per thread.)
 								    """
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								    # Setting default query limit to 50,000 which take about 48 seconds on current table size
 								    # 10, 000 took 11s and 100,000 took 1 min 30 seconds.
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								    select_into_temp_table = """
-												make delete notification tasks parallel by notification type

we used to do this until apr 2020. Let's try doing it again.
Back then, we had problems with timing. We did two things in spring
2020:

We moved to using an intermediary temp table [1]
We stopped the tasks being parallelised [2]

However, it turned out the real time saving was from changing what
services we delete for [3]. The task was actually CPU-bound rather than
DB-bound, so that's probably why having the tasks in parallel wasn't
helping, since they were all competing for the same CPU. It's worth
trying the parallel steps again now that we're no longer CPU bound.

Note: Temporary tables are in their own postgres schema, and are only
viewable by the current session (session == connection. Each celery
worker process has its own db connection). We don't need to worry about
separate workers both trying to use the same table at once.

I've also added a "DROP ON COMMIT" directive to the table definition
just to ensure it doesn't persist past the task even if there's an
exception. (This also drops on rollback).

Cronitor looks at the three functions separately so we don't need to worry
about the main task taking milliseconds where it used to take hours as
it isn't monitored itself.

I've also removed some unnecessary redundant exception logs.

[1] https://github.com/alphagov/notifications-api/pull/2767
[2] https://github.com/alphagov/notifications-api/pull/2798
[3] https://github.com/alphagov/notifications-api/pull/3381

											
										
										
											2021-12-01 14:28:08 +00:00
+								         CREATE TEMP TABLE NOTIFICATION_ARCHIVE ON COMMIT DROP AS
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								         SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id,
 								             key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units,
 								             client_reference, international, phone_prefix, rate_multiplier, notification_status,
 								              created_by_id, postage, document_download_count
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								          FROM notifications
 								        WHERE service_id = :service_id
 								          AND notification_type = :notification_type
-												Change as per code review
 - fix test name
 - make query filter consistent with each other
 - add comment for clarity
 - add inner loop to continue to insert and delete notifications while the delete count is greater than 0

											
										
										
											2020-03-24 14:09:13 +00:00
+								          AND created_at < :timestamp_to_delete_backwards_from
-												Add team key to the notifications to delete/insert into notifications/notification_history

											
										
										
											2020-03-31 09:23:41 +01:00
+								          AND key_type in ('normal', 'team')
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								        limit :qry_limit
 								        """
-												Do not delete letters if not in final state

A few weeks ago, we deleted some pdf letters that had reached their
retention period. However, these letters were in the 'created' state so
it's very arguable that we should not have deleted them because we were
expecting to resend them and were unable to. Part of the reason for this
is that we marked the letters back to `created` as the status but we did
not nullify the `sent_at` timestamp, meaning the check on
https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346
did not catch it. Regardless of that check, which controls whether the
files were removed from S3, they were also archived into the
`notification_history` table as by default.

This commit does changes our code such that letters that are not in
their final state do not go through our retention process. This could
mean they violate their retention policy but that is likely the lesser
of two evils (the other being we delete them and are unable to resend
them).

Note, `sending` letters have been included in those not to be removed
because there is a risk that we give the letter to DVLA and put it in
`sending` but then they come back to us later telling us they've had
problems and require us to resend.

											
										
										
											2020-12-16 10:50:11 +00:00
+								    select_into_temp_table_for_letters = """
-												make delete notification tasks parallel by notification type

we used to do this until apr 2020. Let's try doing it again.
Back then, we had problems with timing. We did two things in spring
2020:

We moved to using an intermediary temp table [1]
We stopped the tasks being parallelised [2]

However, it turned out the real time saving was from changing what
services we delete for [3]. The task was actually CPU-bound rather than
DB-bound, so that's probably why having the tasks in parallel wasn't
helping, since they were all competing for the same CPU. It's worth
trying the parallel steps again now that we're no longer CPU bound.

Note: Temporary tables are in their own postgres schema, and are only
viewable by the current session (session == connection. Each celery
worker process has its own db connection). We don't need to worry about
separate workers both trying to use the same table at once.

I've also added a "DROP ON COMMIT" directive to the table definition
just to ensure it doesn't persist past the task even if there's an
exception. (This also drops on rollback).

Cronitor looks at the three functions separately so we don't need to worry
about the main task taking milliseconds where it used to take hours as
it isn't monitored itself.

I've also removed some unnecessary redundant exception logs.

[1] https://github.com/alphagov/notifications-api/pull/2767
[2] https://github.com/alphagov/notifications-api/pull/2798
[3] https://github.com/alphagov/notifications-api/pull/3381

											
										
										
											2021-12-01 14:28:08 +00:00
+								         CREATE TEMP TABLE NOTIFICATION_ARCHIVE ON COMMIT DROP AS
-												Do not delete letters if not in final state

A few weeks ago, we deleted some pdf letters that had reached their
retention period. However, these letters were in the 'created' state so
it's very arguable that we should not have deleted them because we were
expecting to resend them and were unable to. Part of the reason for this
is that we marked the letters back to `created` as the status but we did
not nullify the `sent_at` timestamp, meaning the check on
https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346
did not catch it. Regardless of that check, which controls whether the
files were removed from S3, they were also archived into the
`notification_history` table as by default.

This commit does changes our code such that letters that are not in
their final state do not go through our retention process. This could
mean they violate their retention policy but that is likely the lesser
of two evils (the other being we delete them and are unable to resend
them).

Note, `sending` letters have been included in those not to be removed
because there is a risk that we give the letter to DVLA and put it in
`sending` but then they come back to us later telling us they've had
problems and require us to resend.

											
										
										
											2020-12-16 10:50:11 +00:00
+								         SELECT id, job_id, job_row_number, service_id, template_id, template_version, api_key_id,
 								             key_type, notification_type, created_at, sent_at, sent_by, updated_at, reference, billable_units,
 								             client_reference, international, phone_prefix, rate_multiplier, notification_status,
 								              created_by_id, postage, document_download_count
 								          FROM notifications
 								        WHERE service_id = :service_id
 								          AND notification_type = :notification_type
 								          AND created_at < :timestamp_to_delete_backwards_from
 								          AND notification_status NOT IN ('pending-virus-check', 'created', 'sending')
 								          AND key_type in ('normal', 'team')
 								        limit :qry_limit
 								        """
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								    # Insert into NotificationHistory if the row already exists do nothing.
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								    insert_query = """
 								        insert into notification_history
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								         SELECT * from NOTIFICATION_ARCHIVE
 								          ON CONFLICT ON CONSTRAINT notification_history_pkey
 								          DO NOTHING
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								    """
 								    delete_query = """
 								        DELETE FROM notifications
 								        where id in (select id from NOTIFICATION_ARCHIVE)
 								    """
 								    input_params = {
 								        "service_id": service_id,
 								        "notification_type": notification_type,
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								        "timestamp_to_delete_backwards_from": timestamp_to_delete_backwards_from,
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
+								        "qry_limit": qry_limit
 								    }
-												Reduce log messages.

											
										
										
											2020-03-25 08:08:33 +00:00
-												Do not delete letters if not in final state

A few weeks ago, we deleted some pdf letters that had reached their
retention period. However, these letters were in the 'created' state so
it's very arguable that we should not have deleted them because we were
expecting to resend them and were unable to. Part of the reason for this
is that we marked the letters back to `created` as the status but we did
not nullify the `sent_at` timestamp, meaning the check on
https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346
did not catch it. Regardless of that check, which controls whether the
files were removed from S3, they were also archived into the
`notification_history` table as by default.

This commit does changes our code such that letters that are not in
their final state do not go through our retention process. This could
mean they violate their retention policy but that is likely the lesser
of two evils (the other being we delete them and are unable to resend
them).

Note, `sending` letters have been included in those not to be removed
because there is a risk that we give the letter to DVLA and put it in
`sending` but then they come back to us later telling us they've had
problems and require us to resend.

											
										
										
											2020-12-16 10:50:11 +00:00
+								    select_to_use = select_into_temp_table_for_letters if notification_type == 'letter' else select_into_temp_table
 								    db.session.execute(select_to_use, input_params)
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
-												Update the query to only return the count from the table since that is
all we care about.

https://www.pivotaltracker.com/story/show/180262357

											
										
										
											2021-11-17 14:46:52 +00:00
+								    result = db.session.execute("select count(*) from NOTIFICATION_ARCHIVE").fetchone()[0]
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
 								    db.session.execute(insert_query)
 								    db.session.execute(delete_query)
-												Update the query to only return the count from the table since that is
all we care about.

https://www.pivotaltracker.com/story/show/180262357

											
										
										
											2021-11-17 14:46:52 +00:00
+								    return result
-												Transaction to insert history and delete notifications.

Need to test the performance of this function, then we can call it from the task.
- Create a temporary table to insert ids of the desired rows, limit by 10K (might be too low).
- Insert into NotifcationHistory select from notification where id in temp table
- Delete from Notifications where id in temp table
- drop temp table

We should be able to iterate of this. The query stats for the query to create the temp table are very good, 17ms.

											
										
										
											2020-03-23 15:53:53 +00:00
-												split delete task up into per service

we really don't gain anything by running each service delete in sequence
- we get the services, and then just loop through them deleting per
service. By deleting per service in separate tasks, we can take
advantage of parallelism. the only thing we lose is some log lines but I
don't think we're that interested in them.

only set query limit at the move_notifications dao function - the task
doesn't really care about the technical implementation of how it deletes
the notifications

											
										
										
											2021-12-06 09:30:48 +00:00
+								def move_notifications_to_notification_history(
 								    notification_type,
 								    service_id,
 								    timestamp_to_delete_backwards_from,
 								    qry_limit=50000
 								):
-												Change sql to chunk by hour to remove old notifications

insert/update, and then delete notifications in hourly batches. This
means that if the task gets interrupted part-way through, we'll have at
least something to show for it. Previously we would insert and update
into the history table but might not delete from the notification table
properly.

Keeping the offsets and limits for confidence around reliability and
queries timing out.

Keeping the join to notification_history to ensure we don't delete
anything prematurely while our DB is in a bit of a weird state with lots
of these tasks failing over the last week.

											
										
										
											2020-03-20 19:07:08 +00:00
+								    deleted = 0
 								    if notification_type == LETTER_TYPE:
 								        _delete_letters_from_s3(
-												split delete task up into per service

we really don't gain anything by running each service delete in sequence
- we get the services, and then just loop through them deleting per
service. By deleting per service in separate tasks, we can take
advantage of parallelism. the only thing we lose is some log lines but I
don't think we're that interested in them.

only set query limit at the move_notifications dao function - the task
doesn't really care about the technical implementation of how it deletes
the notifications

											
										
										
											2021-12-06 09:30:48 +00:00
+								            notification_type, service_id, timestamp_to_delete_backwards_from, qry_limit
-												Change sql to chunk by hour to remove old notifications

insert/update, and then delete notifications in hourly batches. This
means that if the task gets interrupted part-way through, we'll have at
least something to show for it. Previously we would insert and update
into the history table but might not delete from the notification table
properly.

Keeping the offsets and limits for confidence around reliability and
queries timing out.

Keeping the join to notification_history to ensure we don't delete
anything prematurely while our DB is in a bit of a weird state with lots
of these tasks failing over the last week.

											
										
										
											2020-03-20 19:07:08 +00:00
+								        )
-												Remove the hourly loop and replace it with a while loop, which check if we still have things to delete for the day.

We are already limiting the query by a query limit.

											
										
										
											2020-03-24 14:44:42 +00:00
+								    delete_count_per_call = 1
 								    while delete_count_per_call > 0:
 								        delete_count_per_call = insert_notification_history_delete_notifications(
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								            notification_type=notification_type,
 								            service_id=service_id,
-												split delete task up into per service

we really don't gain anything by running each service delete in sequence
- we get the services, and then just loop through them deleting per
service. By deleting per service in separate tasks, we can take
advantage of parallelism. the only thing we lose is some log lines but I
don't think we're that interested in them.

only set query limit at the move_notifications dao function - the task
doesn't really care about the technical implementation of how it deletes
the notifications

											
										
										
											2021-12-06 09:30:48 +00:00
+								            timestamp_to_delete_backwards_from=timestamp_to_delete_backwards_from,
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								            qry_limit=qry_limit
-												Change sql to chunk by hour to remove old notifications

insert/update, and then delete notifications in hourly batches. This
means that if the task gets interrupted part-way through, we'll have at
least something to show for it. Previously we would insert and update
into the history table but might not delete from the notification table
properly.

Keeping the offsets and limits for confidence around reliability and
queries timing out.

Keeping the join to notification_history to ensure we don't delete
anything prematurely while our DB is in a bit of a weird state with lots
of these tasks failing over the last week.

											
										
										
											2020-03-20 19:07:08 +00:00
+								        )
-												Remove the hourly loop and replace it with a while loop, which check if we still have things to delete for the day.

We are already limiting the query by a query limit.

											
										
										
											2020-03-24 14:44:42 +00:00
+								        deleted += delete_count_per_call
-												Change sql to chunk by hour to remove old notifications

insert/update, and then delete notifications in hourly batches. This
means that if the task gets interrupted part-way through, we'll have at
least something to show for it. Previously we would insert and update
into the history table but might not delete from the notification table
properly.

Keeping the offsets and limits for confidence around reliability and
queries timing out.

Keeping the join to notification_history to ensure we don't delete
anything prematurely while our DB is in a bit of a weird state with lots
of these tasks failing over the last week.

											
										
										
											2020-03-20 19:07:08 +00:00
-												Change as per code review
 - fix test name
 - make query filter consistent with each other
 - add comment for clarity
 - add inner loop to continue to insert and delete notifications while the delete count is greater than 0

											
										
										
											2020-03-24 14:09:13 +00:00
+								    # Deleting test Notifications, test notifications are not persisted to NotificationHistory
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								    Notification.query.filter(
-												Added the limit to the query for the services with data retention.
Also did a bit of refactoring.

											
										
										
											2018-12-27 14:00:53 +00:00
+								        Notification.notification_type == notification_type,
 								        Notification.service_id == service_id,
-												split delete task up into per service

we really don't gain anything by running each service delete in sequence
- we get the services, and then just loop through them deleting per
service. By deleting per service in separate tasks, we can take
advantage of parallelism. the only thing we lose is some log lines but I
don't think we're that interested in them.

only set query limit at the move_notifications dao function - the task
doesn't really care about the technical implementation of how it deletes
the notifications

											
										
										
											2021-12-06 09:30:48 +00:00
+								        Notification.created_at < timestamp_to_delete_backwards_from,
-												Update subquery to be more efficient.
Update subquery to run again but for test keys. Test data is never inserted in Notifications so they need to be deleted separately now given the join to NotificationHistory.

											
										
										
											2019-06-03 15:16:46 +01:00
+								        Notification.key_type == KEY_TYPE_TEST
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								    ).delete(synchronize_session=False)
-												Commit the deletes every 10,000 rows.

											
										
										
											2018-12-21 13:57:35 +00:00
+								    db.session.commit()
-												Update insert_update_notification_history to take a query limit

The nightly job to delete email notifications was failing because it was
timing out (`psycopg2.errors.QueryCanceled: canceling statement due to statement timeout`).

This adds a query limit to the query which inserts or updates
notification history so that it only updates a maximum of 10000 rows at
a time.

											
										
										
											2019-10-14 16:43:37 +01:00
-												Refactor delete_notifications_older_than_retention_by_type to use a new strategy.

The insert_notification_history_delete_notifications function uses a temp table to store the data to insert and delete. This will save extra queries while performing the insert and delete operations.
The function is written in such a way that if the task is stop while processing when it's started up again it will just pick up where it left off.

I've made a decision to delete all test data in one query, I don't anticipate a problem with that.

The performance of this might also be better than last nights test because we are inserting everything we need for the NotificationHistory insert, so we don't need the join to Notifications to perform the insert.

											
										
										
											2020-03-24 12:21:28 +00:00
+								    return deleted
-												We want to staop inserting and updating NotificationHistory each time we insert/update Notification.
This PR adds a function to upsert (insert or update if exists) NotificationHistory all the rows from Notification that we are about to delete in the nightly task. This will happen just before the delete function. Since it is a upsert query the function can be called more than once.
This should allow us remove all the insert/updates to NotificationHistory.

However, there is a consern that this will double the length of time the tasks take. So do we do these upserts in a separate task or in the same one?

											
										
										
											2019-04-29 15:44:42 +01:00
-												Added the limit to the query for the services with data retention.
Also did a bit of refactoring.

											
										
										
											2018-12-27 14:00:53 +00:00
+								def _delete_letters_from_s3(
 								        notification_type, service_id, date_to_delete_from, query_limit
 								):
 								    letters_to_delete_from_s3 = db.session.query(
 								        Notification
 								    ).filter(
 								        Notification.notification_type == notification_type,
 								        Notification.created_at < date_to_delete_from,
-												Do not delete letters if not in final state

A few weeks ago, we deleted some pdf letters that had reached their
retention period. However, these letters were in the 'created' state so
it's very arguable that we should not have deleted them because we were
expecting to resend them and were unable to. Part of the reason for this
is that we marked the letters back to `created` as the status but we did
not nullify the `sent_at` timestamp, meaning the check on
https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346
did not catch it. Regardless of that check, which controls whether the
files were removed from S3, they were also archived into the
`notification_history` table as by default.

This commit does changes our code such that letters that are not in
their final state do not go through our retention process. This could
mean they violate their retention policy but that is likely the lesser
of two evils (the other being we delete them and are unable to resend
them).

Note, `sending` letters have been included in those not to be removed
because there is a risk that we give the letter to DVLA and put it in
`sending` but then they come back to us later telling us they've had
problems and require us to resend.

											
										
										
											2020-12-16 10:50:11 +00:00
+								        Notification.service_id == service_id,
 								        # although letters in non completed statuses do have PDFs in s3, they do not exist in the
-												Document behaviour of s3 letter deleting

The behaviour was a bit of opaque so I have added tests around it so
it's clear what it is doing and why. No functionality has changed

											
										
										
											2020-12-16 10:39:31 +00:00
+								        # production-letters-pdf bucket as they never made it that far so we do not try and delete
 								        # them from it
-												Do not delete letters if not in final state

A few weeks ago, we deleted some pdf letters that had reached their
retention period. However, these letters were in the 'created' state so
it's very arguable that we should not have deleted them because we were
expecting to resend them and were unable to. Part of the reason for this
is that we marked the letters back to `created` as the status but we did
not nullify the `sent_at` timestamp, meaning the check on
https://github.com/alphagov/notifications-api/blob/ebb43082d51b9f27b17190abb0537a710a544408/app/dao/notifications_dao.py#L346
did not catch it. Regardless of that check, which controls whether the
files were removed from S3, they were also archived into the
`notification_history` table as by default.

This commit does changes our code such that letters that are not in
their final state do not go through our retention process. This could
mean they violate their retention policy but that is likely the lesser
of two evils (the other being we delete them and are unable to resend
them).

Note, `sending` letters have been included in those not to be removed
because there is a risk that we give the letter to DVLA and put it in
`sending` but then they come back to us later telling us they've had
problems and require us to resend.

											
										
										
											2020-12-16 10:50:11 +00:00
+								        Notification.status.in_(NOTIFICATION_STATUS_TYPES_COMPLETED)
 								    ).limit(query_limit).all()
 								    for letter in letters_to_delete_from_s3:
-												Raise Exception if letter PDF not in S3

Previously, the function would just return a presumed filename. Now that
it actually checks s3, if the file doesn't exist it'll raise an
exception. By default that's a StopIteration at the end of the bucket
iterator, which isn't ideal as this will get supressed if the function
is called within a generator loop further up or anything.

There are a couple of places where we expect the file may not exist, so
we define a custom exception to rescue specifically here. I did consider
subclassing boto's ClientError, but this wasn't straightforward as the
constructor expects to know the operation that failed, which for me is a
signal that it's not an appropriate (re-)use of the class.

											
										
										
											2021-03-08 18:09:16 +00:00
+								        try:
-												Reduce extra S3 ops when working with letter PDFs

Previously we did some unnecessary work:

- Collate task. This had one S3 request to get a summary of the object,
which was then used in another request to get the full object. We only
need the size of the object, which is included in the summary [1].

- Archive task. This had one S3 request to get a summary of the object,
which was then used to make another request to delete it. We still need
both requests, but we can remove the S3.Object in the middle.

[1]: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#objectsummary

											
										
										
											2021-03-16 11:57:33 +00:00
+								            letter_pdf = find_letter_pdf_in_s3(letter)
 								            letter_pdf.delete()
-												Downgrade log for letter deletion exceptions

If the S3 object is missing [1], then that's what we want, so we
don't need such a severe log for it, but we still want to know as
it's not expected. This is separate to more general "ClientError"
exceptions, which could mean anything.

There weren't any tests to cover missing S3 objects, so I've added
one. I don't think we need a test for ClientErrors:

- If there was no handler, the task would fail and we'd learn about
it that way.

- The scope of the calling task is now much smaller, so it matters
less than it used to [2].

[1]: https://github.com/alphagov/notifications-api/blob/81a79e56ced891ad58901ea61ce707e907de3363/app/letters/utils.py#L52
[2]: https://github.com/alphagov/notifications-api/commit/f965322f25e37f8125463223eb5b2473e246e8d2

											
										
										
											2021-12-20 12:05:53 +00:00
+								        except ClientError:
-												Raise Exception if letter PDF not in S3

Previously, the function would just return a presumed filename. Now that
it actually checks s3, if the file doesn't exist it'll raise an
exception. By default that's a StopIteration at the end of the bucket
iterator, which isn't ideal as this will get supressed if the function
is called within a generator loop further up or anything.

There are a couple of places where we expect the file may not exist, so
we define a custom exception to rescue specifically here. I did consider
subclassing boto's ClientError, but this wasn't straightforward as the
constructor expects to know the operation that failed, which for me is a
signal that it's not an appropriate (re-)use of the class.

											
										
										
											2021-03-08 18:09:16 +00:00
+								            current_app.logger.exception(
-												Downgrade log for letter deletion exceptions

If the S3 object is missing [1], then that's what we want, so we
don't need such a severe log for it, but we still want to know as
it's not expected. This is separate to more general "ClientError"
exceptions, which could mean anything.

There weren't any tests to cover missing S3 objects, so I've added
one. I don't think we need a test for ClientErrors:

- If there was no handler, the task would fail and we'd learn about
it that way.

- The scope of the calling task is now much smaller, so it matters
less than it used to [2].

[1]: https://github.com/alphagov/notifications-api/blob/81a79e56ced891ad58901ea61ce707e907de3363/app/letters/utils.py#L52
[2]: https://github.com/alphagov/notifications-api/commit/f965322f25e37f8125463223eb5b2473e246e8d2

											
										
										
											2021-12-20 12:05:53 +00:00
+								                "Error deleting S3 object for letter: {}".format(letter.id))
 								        except LetterPDFNotFound:
 								            current_app.logger.warning(
 								                "No S3 object to delete for letter: {}".format(letter.id))
-												If the notifications that are being deleted are letters then we need to delete the letter from s3 as well.

											
										
										
											2018-08-08 16:20:25 +01:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												Remove insert to NotificationHistory
Fix all test failures

											
										
										
											2019-05-30 10:37:57 +01:00
+								def dao_delete_notifications_by_id(notification_id):
-												Implemented deleted notification if SQS write fails

											
										
										
											2016-09-08 16:00:18 +01:00
+								    db.session.query(Notification).filter(
 								        Notification.id == notification_id
 								    ).delete(synchronize_session='fetch')
-												Update the timeout_notifications scheduled tasks.
We found that if the notifications were in created or pending they are not purged from notifications.
- New bulk update method to set all notificaitons with:
  -  a status = created|sending|pending to temporary-failure
  - and is older then today minus SENDING_NOTIFICATIONS_TIMEOUT_PERIOD (in seconds)
- the scheduled task to timeout notifications use the new bulk update query.
- the task will be more efficient

											
										
										
											2016-09-13 16:42:53 +01:00
-												Make limit of DAO timeout function more obvious

We're going to iterate how we use the function with a limit, so we
shouldn't say it's "temporary" anymore. We don't need to change the
default, but having it in the function parameters makes it easier to
see the funtion doesn't time out all notifications, just some.

											
										
										
											2021-12-13 16:59:25 +00:00
+								def dao_timeout_notifications(cutoff_time, limit=100000):
-												don't include letters when timing out/deleting old notifications

also refactored those functions, since they were pretty gnarly and
repeated themselves a lot

											
										
										
											2017-04-19 11:34:00 +01:00
+								    """
-												Rewrite DAO timeout method to take cutoff_time

Previously we specified the period and calculated the cutoff time
in the function. Passing it in means we can run the method multiple
times and avoid getting "new" notifications to time out in the time
it takes to process each batch.

											
										
										
											2021-12-13 16:56:21 +00:00
+								    Set email and SMS notifications (only) to "temporary-failure" status
 								    if they're still sending from before the specified cutoff_time.
-												don't include letters when timing out/deleting old notifications

also refactored those functions, since they were pretty gnarly and
repeated themselves a lot

											
										
										
											2017-04-19 11:34:00 +01:00
+								    """
 								    updated_at = datetime.utcnow()
-												Remove unnecessary _timeout partial function

It's no longer necessary to have a separate function that's now
only called once. While sometimes the separation can bring clarity,
here I think it's clearer to have all the code in one place, and
avoid the functools complexity we had before.

											
										
										
											2021-11-25 17:52:16 +00:00
+								    current_statuses = [NOTIFICATION_SENDING, NOTIFICATION_PENDING]
 								    new_status = NOTIFICATION_TEMPORARY_FAILURE
-												Don't update older status in timeout notifications task

											
										
										
											2017-07-06 11:55:56 +01:00
-												modified dao_timeout_notifications
added send delivery status in scheduled timeout sending email/sms tasks

											
										
										
											2017-12-07 16:37:36 +00:00
+								    notifications = Notification.query.filter(
-												Rewrite DAO timeout method to take cutoff_time

Previously we specified the period and calculated the cutoff time
in the function. Passing it in means we can run the method multiple
times and avoid getting "new" notifications to time out in the time
it takes to process each batch.

											
										
										
											2021-12-13 16:56:21 +00:00
+								        Notification.created_at < cutoff_time,
-												modified dao_timeout_notifications
added send delivery status in scheduled timeout sending email/sms tasks

											
										
										
											2017-12-07 16:37:36 +00:00
+								        Notification.status.in_(current_statuses),
-												Optimise query to get notifications to "time out"

From experimenting in production we found a "!=" caused the engine
to use a sequential scan, whereas explicitly listing all the types
ensured an index scan was used.

We also found that querying for many (over 100K) items leads to
the task stalling - no logs, but no evidence of it running either -
so we also add a limit to the query.

Since the query now only returns a subset of notifications, we need
to ensure the subsequent "update" query operates on the same batch.
Also, as a temporary measure, we have a loop in the task code to
ensure it operates on the total set of notifications to "time out",
which we assume is less than 500K for the time being.

											
										
										
											2021-11-08 14:18:21 +00:00
+								        Notification.notification_type.in_([SMS_TYPE, EMAIL_TYPE])
-												Make limit of DAO timeout function more obvious

We're going to iterate how we use the function with a limit, so we
shouldn't say it's "temporary" anymore. We don't need to change the
default, but having it in the function parameters makes it easier to
see the funtion doesn't time out all notifications, just some.

											
										
										
											2021-12-13 16:59:25 +00:00
+								    ).limit(limit).all()
-												Optimise query to get notifications to "time out"

From experimenting in production we found a "!=" caused the engine
to use a sequential scan, whereas explicitly listing all the types
ensured an index scan was used.

We also found that querying for many (over 100K) items leads to
the task stalling - no logs, but no evidence of it running either -
so we also add a limit to the query.

Since the query now only returns a subset of notifications, we need
to ensure the subsequent "update" query operates on the same batch.
Also, as a temporary measure, we have a loop in the task code to
ensure it operates on the total set of notifications to "time out",
which we assume is less than 500K for the time being.

											
										
										
											2021-11-08 14:18:21 +00:00
-												Remove insert to NotificationHistory
Fix all test failures

											
										
										
											2019-05-30 10:37:57 +01:00
+								    Notification.query.filter(
-												Optimise query to get notifications to "time out"

From experimenting in production we found a "!=" caused the engine
to use a sequential scan, whereas explicitly listing all the types
ensured an index scan was used.

We also found that querying for many (over 100K) items leads to
the task stalling - no logs, but no evidence of it running either -
so we also add a limit to the query.

Since the query now only returns a subset of notifications, we need
to ensure the subsequent "update" query operates on the same batch.
Also, as a temporary measure, we have a loop in the task code to
ensure it operates on the total set of notifications to "time out",
which we assume is less than 500K for the time being.

											
										
										
											2021-11-08 14:18:21 +00:00
+								        Notification.id.in_([n.id for n in notifications]),
-												Remove insert to NotificationHistory
Fix all test failures

											
										
										
											2019-05-30 10:37:57 +01:00
+								    ).update(
 								        {'status': new_status, 'updated_at': updated_at},
 								        synchronize_session=False
 								    )
-												don't include letters when timing out/deleting old notifications

also refactored those functions, since they were pretty gnarly and
repeated themselves a lot

											
										
										
											2017-04-19 11:34:00 +01:00
-												Update the timeout_notifications scheduled tasks.
We found that if the notifications were in created or pending they are not purged from notifications.
- New bulk update method to set all notificaitons with:
  -  a status = created|sending|pending to temporary-failure
  - and is older then today minus SENDING_NOTIFICATIONS_TIMEOUT_PERIOD (in seconds)
- the scheduled task to timeout notifications use the new bulk update query.
- the task will be more efficient

											
										
										
											2016-09-13 16:42:53 +01:00
+								    db.session.commit()
-												Remove unnecessary _timeout partial function

It's no longer necessary to have a separate function that's now
only called once. While sometimes the separation can bring clarity,
here I think it's clearer to have all the code in one place, and
avoid the functools complexity we had before.

											
										
										
											2021-11-25 17:52:16 +00:00
+								    return notifications
-												Add DAO for getting billable units/financial year

In order to invoice people we need to know how many text message
fragments they’ve sent per month.

This should be per (government) financial year, ie April 1st to April
1st because we’ll only ever show a page for one year (because the
250,000 allowance is topped up at the start of every financial year).

This commit only does the DAO bit, not the REST bit.

											
										
										
											2016-09-30 17:17:28 +01:00
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								def is_delivery_slow_for_providers(
-												New query for finding if provider is slow

The delivery for provider is slow if more than threshold (currently
we pass in threshold 10%) either took x (for now 4) minutes to deliver,
or are still sending after that time. We look at all notifications
for current provider which are delivered or sending, and are not under
test key, for the last 10 minutes.

We are using created_at to establish if notifications are from last
10 minutes because we have an index on it, so the query is faster.

Also write tests for new is_delivery_slow_for_provider query

											
										
										
											2018-12-04 17:39:43 +00:00
+								        created_at,
-												If the notifications that are being deleted are letters then we need to delete the letter from s3 as well.

											
										
										
											2018-08-08 16:20:25 +01:00
+								        threshold,
 								        delivery_time,
-												Add dao method to get count of slow delivery notifications by provider

											
										
										
											2017-02-13 14:27:32 +00:00
+								):
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								    """
 								    Returns a dict of providers and whether they are currently slow or not. eg:
 								    {
 								        'mmg': True,
 								        'firetext': False
 								    }
 								    """
 								    slow_notification_counts = db.session.query(
 								        ProviderDetails.identifier,
-												New query for finding if provider is slow

The delivery for provider is slow if more than threshold (currently
we pass in threshold 10%) either took x (for now 4) minutes to deliver,
or are still sending after that time. We look at all notifications
for current provider which are delivered or sending, and are not under
test key, for the last 10 minutes.

We are using created_at to establish if notifications are from last
10 minutes because we have an index on it, so the query is faster.

Also write tests for new is_delivery_slow_for_provider query

											
										
										
											2018-12-04 17:39:43 +00:00
+								        case(
 								            [(
 								                Notification.status == NOTIFICATION_DELIVERED,
 								                (Notification.updated_at - Notification.sent_at) >= delivery_time
 								            )],
 								            else_=(datetime.utcnow() - Notification.sent_at) >= delivery_time
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								        ).label("slow"),
 								        func.count().label('count')
 								    ).select_from(
 								        ProviderDetails
 								    ).outerjoin(
 								        Notification, and_(
-												Improve query performance

Adding a filter to `app.dao.notifications_dao.is_delivery_slow_for_providers` query to improve the performance. By added Notifications.notification_type = 'sms' to the query it will improve the performance some analyse shows 500ms improvement, which is a good thing especially when the query is run once a minute.

											
										
										
											2021-11-30 16:42:32 +00:00
+								            Notification.notification_type == SMS_TYPE,
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								            Notification.sent_by == ProviderDetails.identifier,
 								            Notification.created_at >= created_at,
 								            Notification.sent_at.isnot(None),
 								            Notification.status.in_([NOTIFICATION_DELIVERED, NOTIFICATION_PENDING, NOTIFICATION_SENDING]),
 								            Notification.key_type != KEY_TYPE_TEST
 								        )
-												New query for finding if provider is slow

The delivery for provider is slow if more than threshold (currently
we pass in threshold 10%) either took x (for now 4) minutes to deliver,
or are still sending after that time. We look at all notifications
for current provider which are delivered or sending, and are not under
test key, for the last 10 minutes.

We are using created_at to establish if notifications are from last
10 minutes because we have an index on it, so the query is faster.

Also write tests for new is_delivery_slow_for_provider query

											
										
										
											2018-12-04 17:39:43 +00:00
+								    ).filter(
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								        ProviderDetails.notification_type == 'sms',
 								        ProviderDetails.active
 								    ).order_by(
 								        ProviderDetails.identifier
 								    ).group_by(
 								        ProviderDetails.identifier,
 								        "slow"
 								    )
-												New query for finding if provider is slow

The delivery for provider is slow if more than threshold (currently
we pass in threshold 10%) either took x (for now 4) minutes to deliver,
or are still sending after that time. We look at all notifications
for current provider which are delivered or sending, and are not under
test key, for the last 10 minutes.

We are using created_at to establish if notifications are from last
10 minutes because we have an index on it, so the query is faster.

Also write tests for new is_delivery_slow_for_provider query

											
										
										
											2018-12-04 17:39:43 +00:00
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
+								    slow_providers = {}
 								    for provider, rows in groupby(slow_notification_counts, key=attrgetter('identifier')):
 								        rows = list(rows)
 								        total_notifications = sum(row.count for row in rows)
 								        slow_notifications = sum(row.count for row in rows if row.slow)
 								        slow_providers[provider] = (slow_notifications / total_notifications >= threshold)
-												Replace log with StatsD gauge for slow delivery

A gauge is more useful as we can visualise it and combine it with
other stats - we already have other stats for the total number of
notifications sent by provider, and we can extrapolate the number
of slow notifications using this, if needed.

We also still have logs to say the task is running, as well as a
log in the calling code when we actually make a switch [1], so
we're not losing anything by removing the log here.

[1]: https://github.com/alphagov/notifications-api/blob/a9306c45573402495d68f6948a9eb3d6ffbac28c/app/celery/scheduled_tasks.py#L117

											
										
										
											2021-12-14 12:26:00 +00:00
+								        statsd_client.gauge(f'slow-delivery.{provider}.ratio', slow_notifications / total_notifications)
-												randomly choose from providers based on priority

todo: make sure if they don't add up to 100 we do something sensible,
especially if they're both 0.

											
										
										
											2019-10-14 15:01:08 +01:00
 								    return slow_providers
-												Refactor update to notifcations to be a bulk update. This is much better for performance.

											
										
										
											2017-04-07 10:59:12 +01:00
-												Rename database management functions.

Rename @transactional to @autocommit.
Rename nested_transaction to tranaction.

											
										
										
											2021-04-14 07:11:01 +01:00
+								@autocommit
-												remove jobs from letter api and make success/error ftp callback tasks

1. No longer create jobs when creating letters from api :tada:
2. Bulk update notifications based on the notification references after
   we send them to DVLA - either as success or as error

											
										
										
											2017-09-20 11:12:37 +01:00
+								def dao_update_notifications_by_reference(references, update_dict):
-												remove jobs from letter api calls

we now no longer create a job. At the end of the post there is no
action, as we don't have any tasks to queue immediately - if it's a
real notification it'll get picked up in the evening scheduled task.

If it's a test notification, we create it with an initial status of
sending so that we can be sure it'll never get picked up - and then we
trigger the update-letter-notifications-to-sent-to-dvla task to sent
the sent-at/by.

											
										
										
											2017-09-26 09:56:09 +01:00
+								    updated_count = Notification.query.filter(
-												remove jobs from letter api and make success/error ftp callback tasks

1. No longer create jobs when creating letters from api :tada:
2. Bulk update notifications based on the notification references after
   we send them to DVLA - either as success or as error

											
										
										
											2017-09-20 11:12:37 +01:00
+								        Notification.reference.in_(references)
 								    ).update(
-												remove jobs from letter api calls

we now no longer create a job. At the end of the post there is no
action, as we don't have any tasks to queue immediately - if it's a
real notification it'll get picked up in the evening scheduled task.

If it's a test notification, we create it with an initial status of
sending so that we can be sure it'll never get picked up - and then we
trigger the update-letter-notifications-to-sent-to-dvla task to sent
the sent-at/by.

											
										
										
											2017-09-26 09:56:09 +01:00
+								        update_dict,
 								        synchronize_session=False
-												remove jobs from letter api and make success/error ftp callback tasks

1. No longer create jobs when creating letters from api :tada:
2. Bulk update notifications based on the notification references after
   we send them to DVLA - either as success or as error

											
										
										
											2017-09-20 11:12:37 +01:00
+								    )
-												Remove test filter

											
										
										
											2017-04-07 11:22:03 +01:00
-												Stop updating NotificationHistory

Doing my bit to remove imports of fixtures.

											
										
										
											2019-05-08 17:31:27 +01:00
+								    updated_history_count = 0
-												Update Notification history if there is a mismatch in the number of notifications to be updated and the number actually updated.

											
										
										
											2019-05-15 15:30:15 +01:00
+								    if updated_count != len(references):
-												Stop updating NotificationHistory

Doing my bit to remove imports of fixtures.

											
										
										
											2019-05-08 17:31:27 +01:00
+								        updated_history_count = NotificationHistory.query.filter(
 								            NotificationHistory.reference.in_(references)
 								        ).update(
 								            update_dict,
 								            synchronize_session=False
 								        )
-												Refactor update to notifcations to be a bulk update. This is much better for performance.

											
										
										
											2017-04-07 10:59:12 +01:00
-												Add updated history count to dao_update_notifications_by_reference

For returned letter updates most notifications won't exist in the
notifications table, so in order to find out whether the reference
matches any known letters we need to check the count of updated
history records.

											
										
										
											2018-08-30 14:27:57 +01:00
+								    return updated_count, updated_history_count
-												New endpoint to search by "to" field of the notification.
The query ignores case and spaces.

											
										
										
											2017-05-05 14:12:50 +01:00
-												Paginate search results for notifications

The standard way that we indicate that there are more results than can
be returned is by paginating. So even though we don’t intend to paginate
the search results in the admin app, it can still use the presence or
absence of a ‘next’ link to determine whether or not to show a message
about only showing the first 50 results.

											
										
										
											2020-05-01 11:18:33 +01:00
+								def dao_get_notifications_by_recipient_or_reference(
 								    service_id,
 								    search_term,
 								    notification_type=None,
 								    statuses=None,
 								    page=1,
 								    page_size=None,
-												Optimise queries run for creating pagination links

We have been running in to the problem in
pallets/flask-sqlalchemy#518 where
our page loads very slow when viewing a single page of notifications
for a service in the admin app. Tracing this back and using SQL
explain analyze I can see that getting the notifications takes about
a second but the second query to count how many notifications there
are (to work out if there is a next page of pagination) can take up
to 100 seconds.

As suggested in that issue, we do the pagination ourselves.
Our pagination doesn't need us to know exactly how many notifications
there are, just whether there are any on the next page and that can
be done without running the slow query to count how many
notifications in total by using `count_pages=False`.

This commit is analagous to
https://github.com/alphagov/notifications-api/pull/3391/commits/c68d1a2f2391bf712cfe630f9e753dc51048d395

The only difference is that in that case, the pagination links are
used to show prev and/or next links in the admin app. In this case,
the pagination links are only used to see if there is a page 2, and
if there is, say that we are only showing the first 50 results.

											
										
										
											2021-12-10 12:06:55 +00:00
+								    error_out=True,
-												Paginate search results for notifications

The standard way that we indicate that there are more results than can
be returned is by paginating. So even though we don’t intend to paginate
the search results in the admin app, it can still use the presence or
absence of a ‘next’ link to determine whether or not to show a message
about only showing the first 50 results.

											
										
										
											2020-05-01 11:18:33 +01:00
+								):
-												Notification_type is a required parameter, admin app always passes it in.
Normalise for notificaiton type.
Throw InvalidRequest exception is the notification type is invalid.

											
										
										
											2018-03-07 18:13:40 +00:00
 								    if notification_type == SMS_TYPE:
 								        normalised = try_validate_and_format_phone_number(search_term)
 								        for character in {'(', ')', ' ', '-'}:
 								            normalised = normalised.replace(character, '')
 								        normalised = normalised.lstrip('+0')
 								    elif notification_type == EMAIL_TYPE:
-												Don’t 500 when searching with bad email address

In the future we might want to validate email addresses before
attempting to search by them. But for a first pass we can just return
no results when a user types in something that isn’t an email address
or phone number.

It definitely better than returning a 500.

											
										
										
											2017-05-30 14:40:27 +01:00
+								        try:
 								            normalised = validate_and_format_email_address(search_term)
 								        except InvalidEmailError:
-												Notification_type is a required parameter, admin app always passes it in.
Normalise for notificaiton type.
Throw InvalidRequest exception is the notification type is invalid.

											
										
										
											2018-03-07 18:13:40 +00:00
+								            normalised = search_term.lower()
-												Search for emails or texts, depending on term

The caseworking view is going to have a page which displays emails and
text messages combined together.

In order for the search to work on this page the user needs to be able
to search for an email or a text message. This commit makes it guess
what to search for when the `notification_type` isn’t known (basically
by saying ‘if the search term is only digits they’re probably looking
searching by phone number’).

											
										
										
											2018-06-13 16:04:49 +01:00
-												Make allowed notification types explicit in search

By not having a catch-all else, it makes it clearer what we’re
expecting. And then we think it’s worth adding a comment explaining why
we normalise as we do for letters and the `None` case.

											
										
										
											2020-04-23 16:06:34 +01:00
+								    elif notification_type in {LETTER_TYPE, None}:
 								        # For letters, we store the address without spaces, so we need
 								        # to removes spaces from the search term to match. We also do
 								        # this when a notification type isn’t provided (this will
 								        # happen if a user doesn’t have permission to see the dashboard)
 								        # because email addresses and phone numbers will never be stored
 								        # with spaces either.
-												Let users search for letters

Like we have search by email address or phone number, finding an
individual letter is a common task. At the moment users are having to
click through pages and pages of letters to find the one they’re looking
for.

We have to search in the `to` and `normalised_to` fields for now because
we’re not populating the `normalised_to` column for letters at the
moment.

											
										
										
											2020-04-21 14:19:41 +01:00
+								        normalised = ''.join(search_term.split()).lower()
-												Do more normalisation for better partial matching

Phone numbers sometimes contain stuff we normalise out. This matches
perfectly if we have a full phone number, because we can normalise the
thing we’re searching for in the same way as the search term.

With partial search terms we can’t do this completely, because we can’t
work out if ‘123’ is part of a UK number, an international number, the
start of the phone number, the last 3 digits, etc.

What we can do is remove some stuff that we can know will cause partial
search terms to not match:
- leading pluses
- leading `0`s
- any brackets
- any spaces

											
										
										
											2018-03-06 12:39:58 +00:00
-												Stop guessing notification type

Before the search term was either:
- an email address (or partial email address)
- a phone number (or partial phone number)

Now it can also be:
- a reference (or partial reference)

We can take a pretty good guess, by looking at the search term, whether
the thing the user is searching by email address or phone number. This
helps us:
- only show relevant notifications
- normalise the search term to give the best chance of matching what we
  store in the `normalised_to` field

However we can’t look at a search term and guess whether it’s a
reference, because a reference could take any format. Therefore if the
user hasn’t told us what kind of thing their search term is, we should
stop trying to guess.

											
										
										
											2019-12-16 10:27:55 +00:00
+								    else:
-												Make allowed notification types explicit in search

By not having a catch-all else, it makes it clearer what we’re
expecting. And then we think it’s worth adding a comment explaining why
we normalise as we do for letters and the `None` case.

											
										
										
											2020-04-23 16:06:34 +01:00
+								        raise TypeError(
 								            f'Notification type must be {EMAIL_TYPE}, {SMS_TYPE}, {LETTER_TYPE} or None'
 								        )
-												Stop guessing notification type

Before the search term was either:
- an email address (or partial email address)
- a phone number (or partial phone number)

Now it can also be:
- a reference (or partial reference)

We can take a pretty good guess, by looking at the search term, whether
the thing the user is searching by email address or phone number. This
helps us:
- only show relevant notifications
- normalise the search term to give the best chance of matching what we
  store in the `normalised_to` field

However we can’t look at a search term and guess whether it’s a
reference, because a reference could take any format. Therefore if the
user hasn’t told us what kind of thing their search term is, we should
stop trying to guess.

											
										
										
											2019-12-16 10:27:55 +00:00
-												Move code that escapes special chars to helper function and use it

in query get_users_by_partial_email

											
										
										
											2018-07-13 15:26:42 +01:00
+								    normalised = escape_special_characters(normalised)
-												Allow searching by reference as well as recipient

We have a team who want to find emails that might have been sent to an
incorrect address. Therefore they can’t search by the correct address,
because it won’t match.

What they do have is the reference number of the user’s application,
which is also stored in the `client_reference` field on the
notification.

So when a user is searching we should also look at the client reference,
as well as the recipient, allowing the user to enter either in the
search box.

											
										
										
											2019-12-12 16:01:22 +00:00
+								    search_term = escape_special_characters(search_term)
-												Escape special characters in search by recipient

SQLAlchemy handles escaping anything that could allow a SQL injection
attack. But it doesn’t escape the characters used for wildcard
searching. This is the reason we’re able to do `.like('%example%')`
at all.

But we shouldn’t be letting our users search with wildcard characters,
so we need to escape them. Which is what this commit does.

											
										
										
											2018-03-14 10:34:45 +00:00
-												Search notification against normalised recipient with filter for status

											
										
										
											2017-05-24 14:24:57 +01:00
+								    filters = [
-												New endpoint to search by "to" field of the notification.
The query ignores case and spaces.

											
										
										
											2017-05-05 14:12:50 +01:00
+								        Notification.service_id == service_id,
-												Allow searching by reference as well as recipient

We have a team who want to find emails that might have been sent to an
incorrect address. Therefore they can’t search by the correct address,
because it won’t match.

What they do have is the reference number of the user’s application,
which is also stored in the `client_reference` field on the
notification.

So when a user is searching we should also look at the client reference,
as well as the recipient, allowing the user to enter either in the
search box.

											
										
										
											2019-12-12 16:01:22 +00:00
+								        or_(
 								            Notification.normalised_to.like("%{}%".format(normalised)),
 								            Notification.client_reference.ilike("%{}%".format(search_term)),
 								        ),
-												Exclude test keys when searching by recipient

The activity page doesn’t show notifications sent with a test key.

However it _does_ when you search by recipient. This is confusing
and inconsistent.

											
										
										
											2017-08-21 15:35:55 +01:00
+								        Notification.key_type != KEY_TYPE_TEST,
-												Search notification against normalised recipient with filter for status

											
										
										
											2017-05-24 14:24:57 +01:00
+								    ]
 								    if statuses:
 								        filters.append(Notification.status.in_(statuses))
-												The template type should be passed in when doing a search by recipent.
It is possible to search for a phone number when from the email notification page and get a SMS message in return.
This also helps to optimise the query.

											
										
										
											2018-03-07 17:11:29 +00:00
+								    if notification_type:
 								        filters.append(Notification.notification_type == notification_type)
-												Search notification against normalised recipient with filter for status

											
										
										
											2017-05-24 14:24:57 +01:00
-												Limit search by recipient to 50 results

Things could get ugly if you use a short search string on a service with
lots of notifications…

											
										
										
											2020-04-28 11:26:06 +01:00
+								    results = db.session.query(Notification)\
 								        .filter(*filters)\
 								        .order_by(desc(Notification.created_at))\
-												Optimise queries run for creating pagination links

We have been running in to the problem in
pallets/flask-sqlalchemy#518 where
our page loads very slow when viewing a single page of notifications
for a service in the admin app. Tracing this back and using SQL
explain analyze I can see that getting the notifications takes about
a second but the second query to count how many notifications there
are (to work out if there is a next page of pagination) can take up
to 100 seconds.

As suggested in that issue, we do the pagination ourselves.
Our pagination doesn't need us to know exactly how many notifications
there are, just whether there are any on the next page and that can
be done without running the slow query to count how many
notifications in total by using `count_pages=False`.

This commit is analagous to
https://github.com/alphagov/notifications-api/pull/3391/commits/c68d1a2f2391bf712cfe630f9e753dc51048d395

The only difference is that in that case, the pagination links are
used to show prev and/or next links in the admin app. In this case,
the pagination links are only used to see if there is a page 2, and
if there is, say that we are only showing the first 50 results.

											
										
										
											2021-12-10 12:06:55 +00:00
+								        .paginate(page=page, per_page=page_size, count=False, error_out=error_out)
-												Search notification against normalised recipient with filter for status

											
										
										
											2017-05-24 14:24:57 +01:00
+								    return results
-												Add scheduled_for in the post notification request form.
Return scheduled for in get_notification requests.

											
										
										
											2017-05-15 17:27:38 +01:00
-												Compare letter page count with billable units in DVLA response file

We compare the page_count in the response file we receive from the DVLA
with the billable_units of the letter. If these don't match, we log an
error.

											
										
										
											2018-01-17 09:52:13 +00:00
+								def dao_get_notification_by_reference(reference):
 								    return Notification.query.filter(
 								        Notification.reference == reference
 								    ).one()
-												Rename function for clarity

											
										
										
											2020-03-27 15:48:54 +00:00
+								def dao_get_notification_or_history_by_reference(reference):
-												Prepare to stop inserting NotificationHistory at the time of inserting a notificaiton.
Need to remove foreign key to complaints.
Make sure if getting Notification.id we look to both tables.

											
										
										
											2019-05-21 16:08:18 +01:00
+								    try:
 								        # This try except is necessary because in test keys and research mode does not create notification history.
 								        # Otherwise we could just search for the NotificationHistory object
 								        return Notification.query.filter(
 								            Notification.reference == reference
 								        ).one()
 								    except NoResultFound:
 								        return NotificationHistory.query.filter(
 								            NotificationHistory.reference == reference
 								        ).one()
-												Process responses for letters even after the notification has been deleted.

This will continue to update the notification history for letter notifications.
We currently have an issue where the responses to letters from the provider is taking a long time.
This is due to the manual nature of their process.
Updating the status of the letter will still work if the notification has been purged.

Also turned back on the purge letter notification scheduled task.

											
										
										
											2018-03-01 15:39:51 +00:00
-												Remove everything for the performance platform

We no longer will send them any stats so therefore don't need the code
- the code to work out the nightly stats
- the performance platform client
- any configuration for the client
- any nightly tasks that kick off the sending off the stats

We will require a change in cronitor as we no longer will have this task
run meaning we need to delete the cronitor check.

											
										
										
											2021-03-11 18:53:43 +00:00
+								def dao_get_notifications_processing_time_stats(start_date, end_date):
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								    """
-												Remove everything for the performance platform

We no longer will send them any stats so therefore don't need the code
- the code to work out the nightly stats
- the performance platform client
- any configuration for the client
- any nightly tasks that kick off the sending off the stats

We will require a change in cronitor as we no longer will have this task
run meaning we need to delete the cronitor check.

											
										
										
											2021-03-11 18:53:43 +00:00
+								    For a given time range, returns the number of notifications sent and the number of
 								    those notifications that we processed within 10 seconds
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								    SELECT
-												Persist the processing time statistics to the database.

The performance platform is going away soon. The only stat that we do not have in our database is the processing time. Let me clarify the only statistic we don't have in our database that we can query efficiently is the processing time. Any queries on notification_history are too inefficient to use on a web page.
Processing time = the total number of normal/team emails and text messages plus the number of messages that have gone from created to sending within 10 seconds per whole day. We can then easily calculate the percentage of messages that were marked as sending under 10 seconds.

											
										
										
											2021-02-22 15:42:29 +00:00
+								    count(notifications),
-												Send '0', not 'null', to perf platform if no notifications are sent

											
										
										
											2017-08-30 16:02:30 +01:00
+								    coalesce(sum(CASE WHEN sent_at - created_at <= interval '10 seconds' THEN 1 ELSE 0 END), 0)
-												Persist the processing time statistics to the database.

The performance platform is going away soon. The only stat that we do not have in our database is the processing time. Let me clarify the only statistic we don't have in our database that we can query efficiently is the processing time. Any queries on notification_history are too inefficient to use on a web page.
Processing time = the total number of normal/team emails and text messages plus the number of messages that have gone from created to sending within 10 seconds per whole day. We can then easily calculate the percentage of messages that were marked as sending under 10 seconds.

											
										
										
											2021-02-22 15:42:29 +00:00
+								    FROM notifications
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								    WHERE
 								    created_at > 'START DATE' AND
 								    created_at < 'END DATE' AND
 								    api_key_id IS NOT NULL AND
 								    key_type != 'test' AND
 								    notification_type != 'letter';
 								    """
-												Update the query that gets the number of notifications that have been sent under 10 seconds to use Notifications rather than NotificationHistory.

Also removed a test that is not useful

											
										
										
											2019-04-10 10:06:27 +01:00
+								    under_10_secs = Notification.sent_at - Notification.created_at <= timedelta(seconds=10)
-												Send '0', not 'null', to perf platform if no notifications are sent

											
										
										
											2017-08-30 16:02:30 +01:00
+								    sum_column = functions.coalesce(functions.sum(
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								        case(
 								            [
 								                (under_10_secs, 1)
 								            ],
 								            else_=0
 								        )
-												Send '0', not 'null', to perf platform if no notifications are sent

											
										
										
											2017-08-30 16:02:30 +01:00
+								    ), 0)
-												Make perf platform processing stats query the NotificationHistory table

											
										
										
											2017-08-31 12:44:06 +01:00
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								    return db.session.query(
-												Update the query that gets the number of notifications that have been sent under 10 seconds to use Notifications rather than NotificationHistory.

Also removed a test that is not useful

											
										
										
											2019-04-10 10:06:27 +01:00
+								        func.count(Notification.id).label('messages_total'),
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								        sum_column.label('messages_within_10_secs')
 								    ).filter(
-												Update the query that gets the number of notifications that have been sent under 10 seconds to use Notifications rather than NotificationHistory.

Also removed a test that is not useful

											
										
										
											2019-04-10 10:06:27 +01:00
+								        Notification.created_at >= start_date,
 								        Notification.created_at < end_date,
 								        Notification.api_key_id.isnot(None),
 								        Notification.key_type != KEY_TYPE_TEST,
 								        Notification.notification_type != LETTER_TYPE
-												Add query to get processing time stats for performance platform

We are only interested in API notifications, not including test
messages. Letters are not included.

											
										
										
											2017-08-29 16:35:30 +01:00
+								    ).one()
-												set letter notifications to pending while notify-ftp does its stuff

this means that if the task is accidentally ran twice (eg we autoscale
notify-celery-worker-beat to 2), it won't send letters twice.

Additionally, update some function names and config variables to make
it clear that they are referring to letter jobs, rather than all letter
content

											
										
										
											2017-09-15 17:46:08 +01:00
-												Process Incomplete Jobs Updates

Comments are PR review. Updated code style in a few places to make it
more consistent with other code, added tests for letters and emails
so they are testedt, refactored some database queries to dao file

- Fixed code style
- Refactored database queries to dao code
- Added tests for emails and sms.

											
										
										
											2017-10-17 11:07:36 +01:00
+								def dao_get_last_notification_added_for_job_id(job_id):
 								    last_notification_added = Notification.query.filter(
 								        Notification.job_id == job_id
 								    ).order_by(
 								        Notification.job_row_number.desc()
 								    ).first()
 								    return last_notification_added
-												Add get_count_of_letters_to_process to notifications_dao

- will get the letter notifications from day before >= letter processing deadline (17:30)
- letters_as_pdf permission is required in the service

											
										
										
											2017-12-18 16:12:17 +00:00
-												If a sms or email has not been sent after 4 hours and 15 minutes then put it on the delivery queue.

											
										
										
											2018-03-23 15:38:35 +00:00
+								def notifications_not_yet_sent(should_be_sending_after_seconds, notification_type):
 								    older_than_date = datetime.utcnow() - timedelta(seconds=should_be_sending_after_seconds)
 								    notifications = Notification.query.filter(
 								        Notification.created_at <= older_than_date,
 								        Notification.notification_type == notification_type,
 								        Notification.status == NOTIFICATION_CREATED
 								    ).all()
 								    return notifications
-												Search for emails or texts, depending on term

The caseworking view is going to have a page which displays emails and
text messages combined together.

In order for the search to work on this page the user needs to be able
to search for an email or a text message. This commit makes it guess
what to search for when the `notification_type` isn’t known (basically
by saying ‘if the search term is only digits they’re probably looking
searching by phone number’).

											
										
										
											2018-06-13 16:04:49 +01:00
-												use yield_per instead of limit

limit means we only return 50k letters, if there are more than that for
a service we'll skip them and they won't be picked up until the next
day.

If you remove the limit, sqlalchemy prefetches query results so it can
build up ORM results, for example collapsing joined rows into single
objects with chidren. SQLAlchemy streams the data into a buffer, and
normally will still prefetch the entire resultset so it can ensure
integrity of the session, (so that if you modify one result that is
duplicated further down in the results, both rows are updated in the
session for example). However, we don't care about that, but we do care
about preventing the result set taking up too much memory. We can use
`yield_per` to yield from sqlalchemy to the iterator (in this case the
`for letter in letters_awaiting_sending` loop in letters_pdf_tasks.py) -
this means every time we hit 10000 rows, we go back to the database to
get the next 10k. This way, we only ever need 10k rows in memory at a
time.

This has some caveats, mostly around how we handle the data the query
returns. They're a bit hard to parse but I'm pretty sure the notable
limitations are:

* It's dangerous to modify ORM objects returned by yield_per queries
* It's dangerous to join in a yield_per query if you think there will be
  more than one row per item (for example, if you join from notification
  to service, there'll be multiple result rows containing the same
  service, and if these are split over different yield chunks, then we
  may experience undefined behaviour.

These two limitations are focused around there being no guarantee of
having one unique row per item.

For more reading:
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html

											
										
										
											2020-10-23 20:06:24 +01:00
+								def dao_get_letters_to_be_printed(print_run_deadline, postage, query_limit=10000):
-												Change which letters we want to be sent to look at all days

Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.

Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.

Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.

Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.

											
										
										
											2020-02-17 15:59:53 +00:00
+								    """
-												use yield_per instead of limit

limit means we only return 50k letters, if there are more than that for
a service we'll skip them and they won't be picked up until the next
day.

If you remove the limit, sqlalchemy prefetches query results so it can
build up ORM results, for example collapsing joined rows into single
objects with chidren. SQLAlchemy streams the data into a buffer, and
normally will still prefetch the entire resultset so it can ensure
integrity of the session, (so that if you modify one result that is
duplicated further down in the results, both rows are updated in the
session for example). However, we don't care about that, but we do care
about preventing the result set taking up too much memory. We can use
`yield_per` to yield from sqlalchemy to the iterator (in this case the
`for letter in letters_awaiting_sending` loop in letters_pdf_tasks.py) -
this means every time we hit 10000 rows, we go back to the database to
get the next 10k. This way, we only ever need 10k rows in memory at a
time.

This has some caveats, mostly around how we handle the data the query
returns. They're a bit hard to parse but I'm pretty sure the notable
limitations are:

* It's dangerous to modify ORM objects returned by yield_per queries
* It's dangerous to join in a yield_per query if you think there will be
  more than one row per item (for example, if you join from notification
  to service, there'll be multiple result rows containing the same
  service, and if these are split over different yield chunks, then we
  may experience undefined behaviour.

These two limitations are focused around there being no guarantee of
having one unique row per item.

For more reading:
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html

											
										
										
											2020-10-23 20:06:24 +01:00
+								    Return all letters created before the print run deadline that have not yet been sent. This yields in batches of 10k
 								    to prevent the query taking too long and eating up too much memory. As each 10k batch is yielded, the
 								    get_key_and_size_of_letters_to_be_sent_to_print function will go and fetch the s3 data, andhese  start sending off
 								    tasks to the notify-ftp app to send them.
 								    CAUTION! Modify this query with caution. Modifying filters etc is fine, but if we join onto another table, then
 								    there may be undefined behaviour. Essentially we need each ORM object returned for each row to be unique,
 								    and we should avoid modifying state of returned objects.
 								    For more reading:
 								    https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
 								    https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html
-												Change which letters we want to be sent to look at all days

Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.

Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.

Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.

Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.

											
										
										
											2020-02-17 15:59:53 +00:00
+								    """
-												Remove redundant join

This was left over from when we needed to tell if a notification was
sent by a crown or non-crown service.

											
										
										
											2021-05-06 09:34:46 +01:00
+								    notifications = Notification.query.filter(
-												Refactor dates being passed around

I believe this way is nicer to read, we don't have to change between
datetimes and strings and back.

											
										
										
											2020-02-19 14:23:33 +00:00
+								        Notification.created_at < convert_bst_to_utc(print_run_deadline),
-												Change which letters we want to be sent to look at all days

Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.

Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.

Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.

Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.

											
										
										
											2020-02-17 15:59:53 +00:00
+								        Notification.notification_type == LETTER_TYPE,
-												Don't send test letters to dvla to print

											
										
										
											2020-02-19 13:36:05 +00:00
+								        Notification.status == NOTIFICATION_CREATED,
-												Simplify putting letters in right postage folders

											
										
										
											2020-06-30 17:54:47 +01:00
+								        Notification.key_type == KEY_TYPE_NORMAL,
-												If there is an invalid letter that has not been updated to `validation-failed` because the `update-validation-failed-for-templated-letter` has not been picked up off the letter-tasks queue and the `collate-letter-pdfs-to-be-sent` has started.
1. The number of letters that we send to DVLA will be not be correct (see https://github.com/alphagov/notifications-api/blob/20ead82463a9df1ce3a6325d103a00306609f5fb/app/celery/letters_pdf_tasks.py#L136)
This may raise an alert with DVLA when they find we have sent them fewer letter than we have reported.
2. When we get the PDF from S3 we will get a file not found https://github.com/alphagov/notifications-api/blob/20ead82463a9df1ce3a6325d103a00306609f5fb/app/celery/letters_pdf_tasks.py#L244
The error will not prevent the collate task from completing but we will see an alert email for the exception and raise questions.

Although this situation is very unlikely because we have a 15 minute window between the last letter deadline date and the time we kick off the collate task we should still mitigate these issues. I updated the queries to only return letters with billable_units > 0, all valid letters should have at least 1 billable unit.

											
										
										
											2022-01-07 09:15:21 +00:00
+								        Notification.postage == postage,
 								        Notification.billable_units > 0
-												Change which letters we want to be sent to look at all days

Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.

Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.

Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.

Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.

											
										
										
											2020-02-17 15:59:53 +00:00
+								    ).order_by(
-												Adding service_id to the sort order for the letters being sent to print.

We have had a few instances where letters have caused problems. Particularly for precompiled letters, often the issue comes from the same service.
The hope is that by adding a sort order this will help the print provider narrow down the problem.

There is a small degradation of the performance of the query, but it's not enough to concern me.

											
										
										
											2020-10-15 09:39:07 +01:00
+								        Notification.service_id,
-												Change which letters we want to be sent to look at all days

Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.

Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.

Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.

Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.

											
										
										
											2020-02-17 15:59:53 +00:00
+								        Notification.created_at
-												use yield_per instead of limit

limit means we only return 50k letters, if there are more than that for
a service we'll skip them and they won't be picked up until the next
day.

If you remove the limit, sqlalchemy prefetches query results so it can
build up ORM results, for example collapsing joined rows into single
objects with chidren. SQLAlchemy streams the data into a buffer, and
normally will still prefetch the entire resultset so it can ensure
integrity of the session, (so that if you modify one result that is
duplicated further down in the results, both rows are updated in the
session for example). However, we don't care about that, but we do care
about preventing the result set taking up too much memory. We can use
`yield_per` to yield from sqlalchemy to the iterator (in this case the
`for letter in letters_awaiting_sending` loop in letters_pdf_tasks.py) -
this means every time we hit 10000 rows, we go back to the database to
get the next 10k. This way, we only ever need 10k rows in memory at a
time.

This has some caveats, mostly around how we handle the data the query
returns. They're a bit hard to parse but I'm pretty sure the notable
limitations are:

* It's dangerous to modify ORM objects returned by yield_per queries
* It's dangerous to join in a yield_per query if you think there will be
  more than one row per item (for example, if you join from notification
  to service, there'll be multiple result rows containing the same
  service, and if these are split over different yield chunks, then we
  may experience undefined behaviour.

These two limitations are focused around there being no guarantee of
having one unique row per item.

For more reading:
https://docs.sqlalchemy.org/en/13/orm/query.html?highlight=yield_per#sqlalchemy.orm.query.Query.yield_per
https://www.mail-archive.com/sqlalchemy@googlegroups.com/msg12443.html

											
										
										
											2020-10-23 20:06:24 +01:00
+								    ).yield_per(query_limit)
-												Change which letters we want to be sent to look at all days

Previously, when running the `collate_letter_pdfs_for_day` task, we
would only send letters that were created between 5:30pm yesterday and
5:30 today.

Now we send letters that were created before 5:30pm today and that are
still waiting to be sent. This will help us automatically attempt to
send letters that may have fallen through the gaps and not been sent the
previous day when they should have been.

Previously we solved the problem of letters that had fallen the gap by
having to run the task with a date parameter for example
`collate_letter_pdfs_for_day('2020-02-18'). We no longer need this date
parameter as we will always look back across previous days too for
letters that still need sending.

Note, we have to change from using the pagination `list_objects_v2` to
instead getting each individual notification from s3. We reduce load by
using `HEAD` rather than `GET` but this will still greatly increase the
number of API calls. We acknowledge there will be a small cost to this,
say 50p for 5000 letters and think this is tolerable. Boto3 also handles
retries itself so if when making one of the many HEAD requests, there is
a networking blip then it should be retried automatically for us.

											
										
										
											2020-02-17 15:59:53 +00:00
+								    return notifications
-												Query to get letter and sheet volumes

So we can send daily email with these volumes to DVLA.

											
										
										
											2021-02-22 17:23:49 +00:00
+								def dao_get_letters_and_sheets_volume_by_postage(print_run_deadline):
 								    notifications = db.session.query(
 								        func.count(Notification.id).label('letters_count'),
 								        func.sum(Notification.billable_units).label('sheets_count'),
 								        Notification.postage
 								    ).filter(
 								        Notification.created_at < convert_bst_to_utc(print_run_deadline),
 								        Notification.notification_type == LETTER_TYPE,
 								        Notification.status == NOTIFICATION_CREATED,
 								        Notification.key_type == KEY_TYPE_NORMAL,
-												If there is an invalid letter that has not been updated to `validation-failed` because the `update-validation-failed-for-templated-letter` has not been picked up off the letter-tasks queue and the `collate-letter-pdfs-to-be-sent` has started.
1. The number of letters that we send to DVLA will be not be correct (see https://github.com/alphagov/notifications-api/blob/20ead82463a9df1ce3a6325d103a00306609f5fb/app/celery/letters_pdf_tasks.py#L136)
This may raise an alert with DVLA when they find we have sent them fewer letter than we have reported.
2. When we get the PDF from S3 we will get a file not found https://github.com/alphagov/notifications-api/blob/20ead82463a9df1ce3a6325d103a00306609f5fb/app/celery/letters_pdf_tasks.py#L244
The error will not prevent the collate task from completing but we will see an alert email for the exception and raise questions.

Although this situation is very unlikely because we have a 15 minute window between the last letter deadline date and the time we kick off the collate task we should still mitigate these issues. I updated the queries to only return letters with billable_units > 0, all valid letters should have at least 1 billable unit.

											
										
										
											2022-01-07 09:15:21 +00:00
+								        Notification.billable_units > 0
-												Query to get letter and sheet volumes

So we can send daily email with these volumes to DVLA.

											
										
										
											2021-02-22 17:23:49 +00:00
+								    ).group_by(
 								        Notification.postage
-												Send daily email with letter and sheet  volumes to DVLA

											
										
										
											2021-02-17 17:47:00 +00:00
+								    ).order_by(
 								        Notification.postage
-												Query to get letter and sheet volumes

So we can send daily email with these volumes to DVLA.

											
										
										
											2021-02-22 17:23:49 +00:00
+								    ).all()
 								    return notifications
-												Add scheduled task to find old letters which still have 'created' status

Added a scheduled task to run once a day and check if there were any
letters from before 17.30 that still have a status of 'created'. This
logs an exception instead of trying to fix the error because the fix
will be different depending on which bucket the letter is in.

											
										
										
											2019-06-11 15:13:06 +01:00
+								def dao_old_letters_with_created_status():
 								    yesterday_bst = convert_utc_to_bst(datetime.utcnow()) - timedelta(days=1)
 								    last_processing_deadline = yesterday_bst.replace(hour=17, minute=30, second=0, microsecond=0)
 								    notifications = Notification.query.filter(
-												Add an alert when a letter is created but doesn't have a file in S3 for sending. We can tell this is the case because there is no updated_at and billable units are still 0.

At this point we are just creating a zendesk ticket - perhaps we can just call the create_letter_pdf task.

											
										
										
											2019-11-13 16:39:59 +00:00
+								        Notification.created_at < convert_bst_to_utc(last_processing_deadline),
-												Add scheduled task to find old letters which still have 'created' status

Added a scheduled task to run once a day and check if there were any
letters from before 17.30 that still have a status of 'created'. This
logs an exception instead of trying to fix the error because the fix
will be different depending on which bucket the letter is in.

											
										
										
											2019-06-11 15:13:06 +01:00
+								        Notification.notification_type == LETTER_TYPE,
 								        Notification.status == NOTIFICATION_CREATED
 								    ).order_by(
-												Add an alert when a letter is created but doesn't have a file in S3 for sending. We can tell this is the case because there is no updated_at and billable units are still 0.

At this point we are just creating a zendesk ticket - perhaps we can just call the create_letter_pdf task.

											
										
										
											2019-11-13 16:39:59 +00:00
+								        Notification.created_at
-												Add scheduled task to find old letters which still have 'created' status

Added a scheduled task to run once a day and check if there were any
letters from before 17.30 that still have a status of 'created'. This
logs an exception instead of trying to fix the error because the fix
will be different depending on which bucket the letter is in.

											
										
										
											2019-06-11 15:13:06 +01:00
+								    ).all()
 								    return notifications
-												Add an alert when a letter is created but doesn't have a file in S3 for sending. We can tell this is the case because there is no updated_at and billable units are still 0.

At this point we are just creating a zendesk ticket - perhaps we can just call the create_letter_pdf task.

											
										
										
											2019-11-13 16:39:59 +00:00
+								def letters_missing_from_sending_bucket(seconds_to_subtract):
 								    older_than_date = datetime.utcnow() - timedelta(seconds=seconds_to_subtract)
-												After a comment from @idavidmcdonald, I asked myself why are not creating the task to upload the pdf and update the notification.
The assumption was that S3 would throw an exception if the object was uploaded twice. That's not the case the default behaviour is that if a file already exists it will be overwritten. So it is completely safe to run the task from the alert.

It can also mean that we don't need to wait 4hours 15 minutes. Shall I decease the amount of time before restarting the task?

											
										
										
											2019-11-19 16:04:21 +00:00
+								    # We expect letters to have a `created` status, updated_at timestamp and billable units greater than zero.
-												Add an alert when a letter is created but doesn't have a file in S3 for sending. We can tell this is the case because there is no updated_at and billable units are still 0.

At this point we are just creating a zendesk ticket - perhaps we can just call the create_letter_pdf task.

											
										
										
											2019-11-13 16:39:59 +00:00
+								    notifications = Notification.query.filter(
-												After a comment from @idavidmcdonald, I asked myself why are not creating the task to upload the pdf and update the notification.
The assumption was that S3 would throw an exception if the object was uploaded twice. That's not the case the default behaviour is that if a file already exists it will be overwritten. So it is completely safe to run the task from the alert.

It can also mean that we don't need to wait 4hours 15 minutes. Shall I decease the amount of time before restarting the task?

											
										
										
											2019-11-19 16:04:21 +00:00
+								        Notification.billable_units == 0,
-												Add an alert when a letter is created but doesn't have a file in S3 for sending. We can tell this is the case because there is no updated_at and billable units are still 0.

At this point we are just creating a zendesk ticket - perhaps we can just call the create_letter_pdf task.

											
										
										
											2019-11-13 16:39:59 +00:00
+								        Notification.updated_at == None,  # noqa
 								        Notification.status == NOTIFICATION_CREATED,
 								        Notification.created_at <= older_than_date,
 								        Notification.notification_type == LETTER_TYPE,
 								        Notification.key_type == KEY_TYPE_NORMAL
 								    ).order_by(
 								        Notification.created_at
 								    ).all()
 								    return notifications
-												Add scheduled task to find precompiled letters in wrong state

Added a task which runs twice a day on weekdays and checks for letters that have
been in the state of `pending-virus-check` for over 90 minutes. This is
just logging an exception for now, not trying to fix things, since we
will need to manually check where the issue was.

											
										
										
											2019-06-11 13:16:34 +01:00
+								def dao_precompiled_letters_still_pending_virus_check():
 								    ninety_minutes_ago = datetime.utcnow() - timedelta(seconds=5400)
 								    notifications = Notification.query.filter(
 								        Notification.created_at < ninety_minutes_ago,
 								        Notification.status == NOTIFICATION_PENDING_VIRUS_CHECK
 								    ).order_by(
 								        Notification.created_at
 								    ).all()
 								    return notifications
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								def _duplicate_update_warning(notification, status):
 								    current_app.logger.info(
 								        (
-												Log Service ID when we get a duplicate receipt

This will make it easier to group these logs if a service complains
about the issue.

											
										
										
											2022-03-21 15:41:59 +00:00
+								            'Duplicate callback received for service {service_id}. '
 								            'Notification ID {id} with type {type} sent by {sent_by}. '
 								            'New status was {new_status}, current status is {old_status}. '
 								            'This happened {time_diff} after being first set.'
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								        ).format(
 								            id=notification.id,
 								            old_status=notification.status,
 								            new_status=status,
-												make sure log line works if notification still in created

											
										
										
											2018-12-28 14:29:59 +00:00
+								            time_diff=datetime.utcnow() - (notification.updated_at or notification.created_at),
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								            type=notification.notification_type,
-												Log Service ID when we get a duplicate receipt

This will make it easier to group these logs if a service complains
about the issue.

											
										
										
											2022-03-21 15:41:59 +00:00
+								            sent_by=notification.sent_by,
 								            service_id=notification.service_id
-												log more info when we receive multiple delivery callbacks for one notification

Previously, we logged a warning containing the notification reference
and new status. However it wasn't a great message - this new one
includes the notification id, the old status, the time difference and
more.

This separates out logs for callbacks for notifications we don't know
(error level) and duplicates (info level).

											
										
										
											2018-12-20 16:01:39 +00:00
+								        )
 								    )
-												split delete task up into per service

we really don't gain anything by running each service delete in sequence
- we get the services, and then just loop through them deleting per
service. By deleting per service in separate tasks, we can take
advantage of parallelism. the only thing we lose is some log lines but I
don't think we're that interested in them.

only set query limit at the move_notifications dao function - the task
doesn't really care about the technical implementation of how it deletes
the notifications

											
										
										
											2021-12-06 09:30:48 +00:00
-												Rename function to make it consistent

This is consistent with the new "on_date" function. It was going
off the edge of my screen before in some parts of the code.

											
										
										
											2022-01-24 15:54:37 +00:00
+								def get_service_ids_with_notifications_before(notification_type, timestamp):
-												split delete task up into per service

we really don't gain anything by running each service delete in sequence
- we get the services, and then just loop through them deleting per
service. By deleting per service in separate tasks, we can take
advantage of parallelism. the only thing we lose is some log lines but I
don't think we're that interested in them.

only set query limit at the move_notifications dao function - the task
doesn't really care about the technical implementation of how it deletes
the notifications

											
										
										
											2021-12-06 09:30:48 +00:00
+								    return {
 								        row.service_id
 								        for row in db.session.query(
 								            Notification.service_id
 								        ).filter(
 								            Notification.notification_type == notification_type,
 								            Notification.created_at < timestamp
 								        ).distinct()
 								    }
-												Only aggregate status when necessary for a service

This takes a similar approach to the nightly deletion task so that
we only create sub-tasks when there are actually notifications to
aggregate for a given type and day [1].

We're making this change to stop the duplication errors we're getting
at the moment and ensure the task can scale to more messages and more
services. There are two parts to this:

- Each subtask should now run within the 5 minute visibility timeout.
However, they may still be duplicated if the parent task overruns [2].

- The parent task creates a mininal number of subtasks, and the query
to determine this is very fast for a normal process day (milliseconds).

Since all tasks will run quickly, there should be no more duplication.

In order to test this more nuanced task, I rewrote the tests:

- One test checks the subtask is called correctly.
- One test checks we create all the right subtasks.

[1]: https://github.com/alphagov/notifications-api/pull/3381
[2]: https://docs.google.com/document/d/1MaP6Nyy3nJKkuh_4lP1wuDm19X8LZITOLRd9n3Ax-xg/edit#heading=h.q3intzwqhfzl

											
										
										
											2022-01-25 11:29:57 +00:00
 								def get_service_ids_with_notifications_on_date(notification_type, date):
-												Fix getting service IDs for status aggregation

Addresses [1].

Previously the query would always use UTC midnight, even after we
had switched to BST (+1h). We store timestamps as naive UTC in our
DB - without a timezone - but we want the query to work in terms
of GMT / BST so we adjust for that - BST midnight is 11PM in UTC.

[1]: https://github.com/alphagov/notifications-api/pull/3437#discussion_r791998690

											
										
										
											2022-02-10 10:37:32 +00:00
+								    start_date = get_london_midnight_in_utc(date)
 								    end_date = get_london_midnight_in_utc(date + timedelta(days=1))
-												Fix out-of-date rows in ft_notification_status

This can happen in the following scenario (primarily for letters):

1. A service has a mixture of "delivered" and "sending" letters,
which the status task aggregates into two rows:

  sending | 123
  delivered | 456

2. After the 7 day retention has passed, only the "delivered" letters
will be archived [^1].

3. The status task now looks at the history table [^2], which means
it only sees the "delivered" letters.

4. The "sending" letters are eventually "delivered" and archived (before
the 10 day aggregation cutoff).

5. But the status aggregation task doesn't run.

This commit fixes (5).

[^1]: https://github.com/alphagov/notifications-api/pull/3063
[^2]: https://github.com/alphagov/notifications-api/blob/f87ebb094d9c2037ce6a4c4a38bab87daa868be3/app/dao/fact_notification_status_dao.py#L51

											
										
										
											2022-05-10 11:14:59 +01:00
+								    notification_table_query = db.session.query(
 								        Notification.service_id.label('service_id')
 								    ).filter(
 								        Notification.notification_type == notification_type,
 								        # using >= + < is much more efficient than date(created_at)
 								        Notification.created_at >= start_date,
 								        Notification.created_at < end_date,
 								    )
 								    # Looking at this table is more efficient for historical notifications,
 								    # provided the task to populate it has run before they were archived.
 								    ft_status_table_query = db.session.query(
 								        FactNotificationStatus.service_id.label('service_id')
 								    ).filter(
 								        FactNotificationStatus.notification_type == notification_type,
 								        FactNotificationStatus.bst_date == date,
 								    )
-												Only aggregate status when necessary for a service

This takes a similar approach to the nightly deletion task so that
we only create sub-tasks when there are actually notifications to
aggregate for a given type and day [1].

We're making this change to stop the duplication errors we're getting
at the moment and ensure the task can scale to more messages and more
services. There are two parts to this:

- Each subtask should now run within the 5 minute visibility timeout.
However, they may still be duplicated if the parent task overruns [2].

- The parent task creates a mininal number of subtasks, and the query
to determine this is very fast for a normal process day (milliseconds).

Since all tasks will run quickly, there should be no more duplication.

In order to test this more nuanced task, I rewrote the tests:

- One test checks the subtask is called correctly.
- One test checks we create all the right subtasks.

[1]: https://github.com/alphagov/notifications-api/pull/3381
[2]: https://docs.google.com/document/d/1MaP6Nyy3nJKkuh_4lP1wuDm19X8LZITOLRd9n3Ax-xg/edit#heading=h.q3intzwqhfzl

											
										
										
											2022-01-25 11:29:57 +00:00
+								    return {
-												Fix out-of-date rows in ft_notification_status

This can happen in the following scenario (primarily for letters):

1. A service has a mixture of "delivered" and "sending" letters,
which the status task aggregates into two rows:

  sending | 123
  delivered | 456

2. After the 7 day retention has passed, only the "delivered" letters
will be archived [^1].

3. The status task now looks at the history table [^2], which means
it only sees the "delivered" letters.

4. The "sending" letters are eventually "delivered" and archived (before
the 10 day aggregation cutoff).

5. But the status aggregation task doesn't run.

This commit fixes (5).

[^1]: https://github.com/alphagov/notifications-api/pull/3063
[^2]: https://github.com/alphagov/notifications-api/blob/f87ebb094d9c2037ce6a4c4a38bab87daa868be3/app/dao/fact_notification_status_dao.py#L51

											
										
										
											2022-05-10 11:14:59 +01:00
+								        row.service_id for row in db.session.query(union(
 								            notification_table_query, ft_status_table_query
 								        ).subquery()).distinct()
-												Only aggregate status when necessary for a service

This takes a similar approach to the nightly deletion task so that
we only create sub-tasks when there are actually notifications to
aggregate for a given type and day [1].

We're making this change to stop the duplication errors we're getting
at the moment and ensure the task can scale to more messages and more
services. There are two parts to this:

- Each subtask should now run within the 5 minute visibility timeout.
However, they may still be duplicated if the parent task overruns [2].

- The parent task creates a mininal number of subtasks, and the query
to determine this is very fast for a normal process day (milliseconds).

Since all tasks will run quickly, there should be no more duplication.

In order to test this more nuanced task, I rewrote the tests:

- One test checks the subtask is called correctly.
- One test checks we create all the right subtasks.

[1]: https://github.com/alphagov/notifications-api/pull/3381
[2]: https://docs.google.com/document/d/1MaP6Nyy3nJKkuh_4lP1wuDm19X8LZITOLRd9n3Ax-xg/edit#heading=h.q3intzwqhfzl

											
										
										
											2022-01-25 11:29:57 +00:00
+								    }