From ec951631750a78016c5f729b5cc9ffa8de78a127 Mon Sep 17 00:00:00 2001
From: Katie Smith <katie.smith@digital.cabinet-office.gov.uk>
Date: Thu, 14 Apr 2022 16:39:22 +0100
Subject: [PATCH] Update beautifulsoup4 to 4.11.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`charset-normalizer` is now used by default if installed instead of
`chardet` (https://pyup.io/changelogs/beautifulsoup4/#4.11.0). We do
have `charset-normalizer` installed because it's a subdependency of the
requests library, so it is being used.

This caused the `test_content_too_long_returns_400` to fail since it
now thought that the encoding of `ŵ` is `{'encoding': 'Big5',
'language': 'Chinese', 'confidence': 1.0}`.

There are two options for fixing this
- change the test content so that it doesn't just contain a single
  letter - the docs state that you shouldn't run character detection on
  very tiny content
- add `chardet` as a requirement, so that the code functions exactly the
  same as before

I've chose the first option, since this avoids adding a dependency and
we should never have messages consisting of a single character.
---
 requirements.in                                    | 2 +-
 requirements.txt                                   | 2 +-
 tests/app/v2/broadcast/sample_cap_xml_documents.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/requirements.in b/requirements.in
index 9d4ede6d9..acbe205e9 100644
--- a/requirements.in
+++ b/requirements.in
@@ -22,7 +22,7 @@ SQLAlchemy==1.4.35
 strict-rfc3339==0.7
 rfc3987==1.3.8
 cachetools==5.0.0
-beautifulsoup4==4.10.0
+beautifulsoup4==4.11.1
 lxml==4.8.0
 Werkzeug==2.0.3
 
diff --git a/requirements.txt b/requirements.txt
index c3c3dd044..e04f7a494 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ awscli-cwlogs==1.4.6
     # via -r requirements.in
 bcrypt==3.2.0
     # via flask-bcrypt
-beautifulsoup4==4.10.0
+beautifulsoup4==4.11.1
     # via -r requirements.in
 billiard==3.6.4.0
     # via celery
diff --git a/tests/app/v2/broadcast/sample_cap_xml_documents.py b/tests/app/v2/broadcast/sample_cap_xml_documents.py
index 7420cda0e..37acf006f 100644
--- a/tests/app/v2/broadcast/sample_cap_xml_documents.py
+++ b/tests/app/v2/broadcast/sample_cap_xml_documents.py
@@ -248,5 +248,5 @@ WINDEMERE = """
 """
 
 LONG_GSM7 = WITH_PLACEHOLDER_FOR_CONTENT.format('a' * 1396)
-LONG_UCS2 = WITH_PLACEHOLDER_FOR_CONTENT.format('ŵ' * 616)
+LONG_UCS2 = WITH_PLACEHOLDER_FOR_CONTENT.format('ŵyl' * 205 + 'a')
 MISSING_AREA_NAMES = re.sub("<areaDesc>.*</areaDesc>", "<areaDesc> </areaDesc>", WAINFLEET)