aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-05-10 06:05:20 +0300
committerGitHub <noreply@github.com>2017-05-10 06:05:20 +0300
commit305ccbe27ea5ba82fd2d8c32ec739f980e524330 (patch)
treeb2a5769d41b5de66dff278d57b2caa57b18d4f63
parent211a392cc15f9a7b1b8ce65d8f6c9f8237d1b77f (diff)
downloadcpython-305ccbe27ea5ba82fd2d8c32ec739f980e524330.tar.gz
cpython-305ccbe27ea5ba82fd2d8c32ec739f980e524330.zip
bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. (#1490)
Now allowed several subsequential inline modifiers at the start of the pattern (e.g. '(?i)(?s)...'). In verbose mode whitespaces and comments now are allowed before and between inline modifiers (e.g. '(?x) (?i) (?s)...').
-rw-r--r--Lib/sre_parse.py12
-rw-r--r--Lib/test/test_re.py59
-rw-r--r--Misc/NEWS6
3 files changed, 56 insertions, 21 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index ab37fd3fe2f..d8d1bd552fb 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
sourcematch = source.match
start = source.tell()
while True:
- itemsappend(_parse(source, state, verbose))
+ itemsappend(_parse(source, state, verbose, not nested and not items))
if not sourcematch("|"):
break
@@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
-def _parse(source, state, verbose):
+def _parse(source, state, verbose, first=False):
# parse a simple pattern
subpattern = SubPattern(state)
@@ -730,10 +730,9 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
- pos = source.pos
flags = _parse_flags(source, state, char)
if flags is None: # global flags
- if pos != 3: # "(?x"
+ if not first or subpattern:
import warnings
warnings.warn(
'Flags not at the start of the expression %s%s' % (
@@ -742,6 +741,8 @@ def _parse(source, state, verbose):
),
DeprecationWarning, stacklevel=7
)
+ if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
+ raise Verbose
continue
add_flags, del_flags = flags
group = None
@@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
msg = "unknown flag" if char.isalpha() else "missing -, : or )"
raise source.error(msg, len(char))
if char == ")":
- if ((add_flags & SRE_FLAG_VERBOSE) and
- not (state.flags & SRE_FLAG_VERBOSE)):
- raise Verbose
state.flags |= add_flags
return None
if add_flags & GLOBAL_FLAGS:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 3129f7e9888..4d71eea517e 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase):
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
- p = re.compile(upper_char, re.I | re.U)
- q = p.match(lower_char)
+ p = re.compile('.' + upper_char, re.I | re.S)
+ q = p.match('\n' + lower_char)
self.assertTrue(q)
- p = re.compile(lower_char, re.I | re.U)
- q = p.match(upper_char)
+ p = re.compile('.' + lower_char, re.I | re.S)
+ q = p.match('\n' + upper_char)
self.assertTrue(q)
- p = re.compile('(?i)' + upper_char, re.U)
- q = p.match(lower_char)
+ p = re.compile('(?i).' + upper_char, re.S)
+ q = p.match('\n' + lower_char)
self.assertTrue(q)
- p = re.compile('(?i)' + lower_char, re.U)
- q = p.match(upper_char)
+ p = re.compile('(?i).' + lower_char, re.S)
+ q = p.match('\n' + upper_char)
self.assertTrue(q)
- p = re.compile('(?iu)' + upper_char)
- q = p.match(lower_char)
+ p = re.compile('(?is).' + upper_char)
+ q = p.match('\n' + lower_char)
self.assertTrue(q)
- p = re.compile('(?iu)' + lower_char)
- q = p.match(upper_char)
+ p = re.compile('(?is).' + lower_char)
+ q = p.match('\n' + upper_char)
self.assertTrue(q)
- self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
- self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
+ p = re.compile('(?s)(?i).' + upper_char)
+ q = p.match('\n' + lower_char)
+ self.assertTrue(q)
+
+ p = re.compile('(?s)(?i).' + lower_char)
+ q = p.match('\n' + upper_char)
+ self.assertTrue(q)
+
+ self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
+ self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
+ self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
+ self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
+ self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
p = upper_char + '(?i)'
with self.assertWarns(DeprecationWarning) as warns:
@@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase):
'Flags not at the start of the expression %s (truncated)' % p[:20]
)
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
+ lower_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
+ lower_char))
+
+
def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')
diff --git a/Misc/NEWS b/Misc/NEWS
index 7a79521efd7..997a03411de 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -320,6 +320,12 @@ Extension Modules
Library
-------
+- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
+ Now allowed several subsequential inline modifiers at the start of the
+ pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
+ now are allowed before and between inline modifiers (e.g.
+ ``'(?x) (?i) (?s)...'``).
+
- bpo-30285: Optimized case-insensitive matching and searching of regular
expressions.