| From 2b578479b96aa3deeeb8bac313a02b5cf3cb1aff Mon Sep 17 00:00:00 2001 |
| From: Victor Stinner <vstinner@redhat.com> |
| Date: Tue, 11 Jun 2019 12:45:35 +0200 |
| Subject: [PATCH] [2.7] bpo-36742: Fix urlparse.urlsplit() error message for |
| Unicode URL (GH-13937) |
| |
| If urlparse.urlsplit() detects an invalid netloc according to NFKC |
| normalization, the error message type is now str rather than unicode, |
| and use repr() to format the URL, to prevent <exception str() failed> |
| when display the error message. |
| |
| Signed-off-by: Peter Korsgaard <peter@korsgaard.com> |
| --- |
| Lib/test/test_urlparse.py | 9 +++++++++ |
| Lib/urlparse.py | 5 +++-- |
| .../NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst | 3 +++ |
| 3 files changed, 15 insertions(+), 2 deletions(-) |
| create mode 100644 Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst |
| |
| diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py |
| index 857ed96d92..86c4a0595c 100644 |
| --- a/Lib/test/test_urlparse.py |
| +++ b/Lib/test/test_urlparse.py |
| @@ -656,6 +656,15 @@ class UrlParseTestCase(unittest.TestCase): |
| with self.assertRaises(ValueError): |
| urlparse.urlsplit(url) |
| |
| + # check error message: invalid netloc must be formated with repr() |
| + # to get an ASCII error message |
| + with self.assertRaises(ValueError) as cm: |
| + urlparse.urlsplit(u'http://example.com\uFF03@bing.com') |
| + self.assertEqual(str(cm.exception), |
| + "netloc u'example.com\\uff03@bing.com' contains invalid characters " |
| + "under NFKC normalization") |
| + self.assertIsInstance(cm.exception.args[0], str) |
| + |
| def test_main(): |
| test_support.run_unittest(UrlParseTestCase) |
| |
| diff --git a/Lib/urlparse.py b/Lib/urlparse.py |
| index 6834f3c179..798b467b60 100644 |
| --- a/Lib/urlparse.py |
| +++ b/Lib/urlparse.py |
| @@ -180,8 +180,9 @@ def _checknetloc(netloc): |
| return |
| for c in '/?#@:': |
| if c in netloc2: |
| - raise ValueError(u"netloc '" + netloc + u"' contains invalid " + |
| - u"characters under NFKC normalization") |
| + raise ValueError("netloc %r contains invalid characters " |
| + "under NFKC normalization" |
| + % netloc) |
| |
| def urlsplit(url, scheme='', allow_fragments=True): |
| """Parse a URL into 5 components: |
| diff --git a/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst |
| new file mode 100644 |
| index 0000000000..3ba774056f |
| --- /dev/null |
| +++ b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst |
| @@ -0,0 +1,3 @@ |
| +:func:`urlparse.urlsplit` error message for invalid ``netloc`` according to |
| +NFKC normalization is now a :class:`str` string, rather than a |
| +:class:`unicode` string, to prevent error when displaying the error. |
| -- |
| 2.11.0 |
| |