diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2023-10-14 09:13:02 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-14 09:13:02 +0300 |
commit | e2b3d831fd2824d8a5713e3ed2a64aad0fb6b62d (patch) | |
tree | 3ab33a7a92325e48c5297ba9c3f4d8d9c38d3d00 /Lib/re/_parser.py | |
parent | ca0f3d858d069231ce7c5b382790a774f385b467 (diff) | |
download | cpython-e2b3d831fd2824d8a5713e3ed2a64aad0fb6b62d.tar.gz cpython-e2b3d831fd2824d8a5713e3ed2a64aad0fb6b62d.zip |
gh-109747: Improve errors for unsupported look-behind patterns (GH-109859)
Now re.error is raised instead of OverflowError or RuntimeError for
too large width of look-behind pattern.
The limit is increased to 2**32-1 (was 2**31-1).
Diffstat (limited to 'Lib/re/_parser.py')
-rw-r--r-- | Lib/re/_parser.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index d00b7e67d55..f3c779340fe 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -67,6 +67,10 @@ FLAGS = { TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE GLOBAL_FLAGS = SRE_FLAG_DEBUG +# Maximal value returned by SubPattern.getwidth(). +# Must be larger than MAXREPEAT, MAXCODE and sys.maxsize. +MAXWIDTH = 1 << 64 + class State: # keeps track of state for parsing def __init__(self): @@ -177,7 +181,7 @@ class SubPattern: lo = hi = 0 for op, av in self.data: if op is BRANCH: - i = MAXREPEAT - 1 + i = MAXWIDTH j = 0 for av in av[1]: l, h = av.getwidth() @@ -196,7 +200,10 @@ class SubPattern: elif op in _REPEATCODES: i, j = av[2].getwidth() lo = lo + i * av[0] - hi = hi + j * av[1] + if av[1] == MAXREPEAT and j: + hi = MAXWIDTH + else: + hi = hi + j * av[1] elif op in _UNITCODES: lo = lo + 1 hi = hi + 1 @@ -216,7 +223,7 @@ class SubPattern: hi = hi + j elif op is SUCCESS: break - self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) + self.width = min(lo, MAXWIDTH), min(hi, MAXWIDTH) return self.width class Tokenizer: |