1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
$NetBSD: patch-Lib_urlparse.py,v 1.1.2.2 2021/10/13 21:04:01 tm Exp $
Fix CVE-2021-23336: Add `separator` argument to parse_qs; warn with default
Via Fedora:
https://src.fedoraproject.org/rpms/python2.7/blob/rawhide/f/00359-CVE-2021-23336.patch
--- Lib/urlparse.py.orig 2020-04-19 21:13:39.000000000 +0000
+++ Lib/urlparse.py
@@ -29,6 +29,7 @@ test_urlparse.py provides a good indicat
"""
import re
+import os
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
@@ -382,7 +383,8 @@ def unquote(s):
append(item)
return ''.join(res)
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
+ separator=None):
"""Parse a query given as a string argument.
Arguments:
@@ -405,14 +407,23 @@ def parse_qs(qs, keep_blank_values=0, st
"""
dict = {}
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
- max_num_fields):
+ max_num_fields, separator):
if name in dict:
dict[name].append(value)
else:
dict[name] = [value]
return dict
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+class _QueryStringSeparatorWarning(RuntimeWarning):
+ """Warning for using default `separator` in parse_qs or parse_qsl"""
+
+# The default "separator" for parse_qsl can be specified in a config file.
+# It's cached after first read.
+#_QS_SEPARATOR_CONFIG_FILENAME = '/etc/python/urllib.cfg'
+_default_qs_separator = None
+
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
+ separator=None):
"""Parse a query given as a string argument.
Arguments:
@@ -434,15 +445,72 @@ def parse_qsl(qs, keep_blank_values=0, s
Returns a list, as G-d intended.
"""
+
+ if (not separator or (not isinstance(separator, (str, bytes)))) and separator is not None:
+ raise ValueError("Separator must be of type string or bytes.")
+
+ # Used when both "&" and ";" act as separators. (Need a non-string value.)
+ _legacy = object()
+
+ if separator is None:
+ global _default_qs_separator
+ separator = _default_qs_separator
+ envvar_name = 'PYTHON_URLLIB_QS_SEPARATOR'
+ if separator is None:
+ # Set default separator from environment variable
+ separator = os.environ.get(envvar_name)
+ config_source = 'environment variable'
+ if separator is None:
+ # Set default separator from the configuration file
+ try:
+ file = open(_QS_SEPARATOR_CONFIG_FILENAME)
+ except EnvironmentError:
+ pass
+ else:
+ with file:
+ import ConfigParser
+ config = ConfigParser.ConfigParser()
+ config.readfp(file)
+ separator = config.get('parse_qs', envvar_name)
+ _default_qs_separator = separator
+ config_source = _QS_SEPARATOR_CONFIG_FILENAME
+ if separator is None:
+ # The default is '&', but warn if not specified explicitly
+ if ';' in qs:
+ from warnings import warn
+ warn("The default separator of urlparse.parse_qsl and "
+ + "parse_qs was changed to '&' to avoid a web cache "
+ + "poisoning issue (CVE-2021-23336). "
+ + "By default, semicolons no longer act as query field "
+ + "separators. "
+ + "See https://access.redhat.com/articles/5860431 for "
+ + "more details.",
+ _QueryStringSeparatorWarning, stacklevel=2)
+ separator = '&'
+ elif separator == 'legacy':
+ separator = _legacy
+ elif len(separator) != 1:
+ raise ValueError(
+ '{} (from {}) must contain '.format(envvar_name, config_source)
+ + '1 character, or "legacy". See '
+ + 'https://access.redhat.com/articles/5860431 for more details.'
+ )
+
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
- num_fields = 1 + qs.count('&') + qs.count(';')
+ if separator is _legacy:
+ num_fields = 1 + qs.count('&') + qs.count(';')
+ else:
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ if separator is _legacy:
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ else:
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
|