Use meta tag to tell search engines not to index

Google’s documentation says:

> robots.txt is not a mechanism for keeping a web page out of Google. To
> keep a web page out of Google, you should use noindex directives

A noindex directive means adding the following meta tag to pages that
shouldn’t be indexed:
```html
<meta name="robots" content="noindex" />
```

It’s also possible to set the directive as a HTTP header, but this seems
trickier to achieve on a per-view basis in Flask.

I’ve implemented this as a decorator so it can quickly be added to any
other pages that we decide shouldn’t appear in search results.
This commit is contained in:
Chris Hill-Scott
2020-05-26 17:39:25 +01:00
parent b98f4561fa
commit 92ffe3a78c
7 changed files with 45 additions and 2 deletions

View File

@@ -19,11 +19,13 @@ from app.models.feedback import (
PROBLEM_TICKET_TYPE,
QUESTION_TICKET_TYPE,
)
from app.utils import hide_from_search_engines
bank_holidays = BankHolidays(use_cached_holidays=True)
@main.route('/support', methods=['GET', 'POST'])
@hide_from_search_engines
def support():
if current_user.is_authenticated:
@@ -50,12 +52,14 @@ def support():
@main.route('/support/public')
@hide_from_search_engines
def support_public():
return render_template('views/support/public.html')
@main.route('/support/triage', methods=['GET', 'POST'])
@main.route('/support/triage/<ticket_type:ticket_type>', methods=['GET', 'POST'])
@hide_from_search_engines
def triage(ticket_type=PROBLEM_TICKET_TYPE):
form = Triage()
if form.validate_on_submit():
@@ -75,6 +79,7 @@ def triage(ticket_type=PROBLEM_TICKET_TYPE):
@main.route('/support/<ticket_type:ticket_type>', methods=['GET', 'POST'])
@hide_from_search_engines
def feedback(ticket_type):
form = FeedbackOrProblem()
@@ -153,6 +158,7 @@ def feedback(ticket_type):
@main.route('/support/escalate', methods=['GET', 'POST'])
@hide_from_search_engines
def bat_phone():
if current_user.is_authenticated:
@@ -162,6 +168,7 @@ def bat_phone():
@main.route('/support/thanks', methods=['GET', 'POST'])
@hide_from_search_engines
def thanks():
return render_template(
'views/support/thanks.html',

View File

@@ -11,9 +11,11 @@ from app.main.forms import (
)
from app.main.views.verify import activate_user
from app.models.user import InvitedOrgUser, InvitedUser, User
from app.utils import hide_from_search_engines
@main.route('/register', methods=['GET', 'POST'])
@hide_from_search_engines
def register():
if current_user and current_user.is_authenticated:
return redirect(url_for('main.show_accounts_or_dashboard'))

View File

@@ -14,9 +14,11 @@ from app import login_manager
from app.main import main
from app.main.forms import LoginForm
from app.models.user import InvitedUser, User
from app.utils import hide_from_search_engines
@main.route('/sign-in', methods=(['GET', 'POST']))
@hide_from_search_engines
def sign_in():
if current_user and current_user.is_authenticated:
return redirect(url_for('main.show_accounts_or_dashboard'))

View File

@@ -17,6 +17,9 @@
<style>
.govuk-header__container { border-color: {{header_colour}} }
</style>
{% if g.hide_from_search_engines %}
<meta name="robots" content="noindex" />
{% endif %}
<meta name="google-site-verification" content="niWnSqImOWz6mVQTYqNb5tFK8HaKSB4b3ED4Z9gtUQ0" />
{% block meta_format_detection %}
<meta name="format-detection" content="telephone=no">

View File

@@ -3,7 +3,7 @@
{% from "components/page-header.html" import page_header %}
{% block per_page_title %}
The GOV.UK Notify team cannot give advice to members of the public
The GOV.UK Notify service is for people who work in the government
{% endblock %}
{% block maincolumn_content %}

View File

@@ -16,7 +16,7 @@ import pyexcel
import pyexcel_xlsx
import pytz
from dateutil import parser
from flask import abort, current_app, redirect, request, session, url_for
from flask import abort, current_app, g, redirect, request, session, url_for
from flask_login import current_user, login_required
from notifications_utils.field import Field
from notifications_utils.formatters import (
@@ -767,3 +767,11 @@ def is_less_than_90_days_ago(date_from_db):
return (datetime.utcnow() - datetime.strptime(
date_from_db, "%Y-%m-%dT%H:%M:%S.%fZ"
)).days < 90
def hide_from_search_engines(f):
@wraps(f)
def decorated_function(*args, **kwargs):
g.hide_from_search_engines = True
return f(*args, **kwargs)
return decorated_function

View File

@@ -3,6 +3,7 @@ from functools import partial
import pytest
from bs4 import BeautifulSoup
from flask import url_for
from freezegun import freeze_time
from app.main.forms import FieldWithNoneOption
from tests.conftest import SERVICE_ONE_ID, normalize_spaces, sample_uuid
@@ -76,6 +77,26 @@ def test_robots(client):
)
@pytest.mark.parametrize('endpoint, kwargs', (
('sign_in', {}),
('support', {}),
('support_public', {}),
('triage', {}),
('feedback', {'ticket_type': 'ask-question-give-feedback'}),
('feedback', {'ticket_type': 'general'}),
('feedback', {'ticket_type': 'report-problem'}),
('bat_phone', {}),
('thanks', {}),
('register', {}),
pytest.param('index', {}, marks=pytest.mark.xfail(raises=AssertionError)),
))
@freeze_time('2012-12-12 12:12') # So we dont go out of business hours
def test_hiding_pages_from_search_engines(client_request, endpoint, kwargs):
client_request.logout()
page = client_request.get(f'main.{endpoint}', **kwargs)
assert page.select_one('meta[name=robots]')['content'] == 'noindex'
@pytest.mark.parametrize('view', [
'cookies', 'privacy', 'pricing', 'terms', 'roadmap',
'features', 'documentation', 'security',