Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 124 additions & 69 deletions Doc/library/re.rst

Large diffs are not rendered by default.

15 changes: 14 additions & 1 deletion Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,19 @@ pickle
(Contributed by Zackery Spytz and Serhiy Storchaka in :gh:`77188`.)


re
--

* :func:`re.prefixmatch` and a corresponding :meth:`~re.Pattern.prefixmatch`
have been added as alternate more explicit names for the existing
:func:`re.match` and :meth:`~re.Pattern.match` APIs. These are intended
to be used to alleviate confusion around what *match* means by following the
Zen of Python's *"Explicit is better than implicit"* mantra. Most other
language regular expression libraries use an API named *match* to mean what
Python has always called *search*.
(Contributed by Gregory P. Smith in :gh:`86519`.)


resource
--------

Expand Down Expand Up @@ -1169,7 +1182,7 @@ Diego Russo in :gh:`140683` and :gh:`142305`.)


Removed
=======
========

ctypes
------
Expand Down
38 changes: 21 additions & 17 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,18 @@
\\ Matches a literal backslash.

This module exports the following functions:
match Match a regular expression pattern to the beginning of a string.
fullmatch Match a regular expression pattern to all of a string.
search Search a string for the presence of a pattern.
sub Substitute occurrences of a pattern found in a string.
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a Match object for each match.
compile Compile a pattern into a Pattern object.
purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string.
prefixmatch Match a regular expression pattern to the beginning of a str.
match The original name of prefixmatch prior to 3.15.
fullmatch Match a regular expression pattern to all of a string.
search Search a string for the presence of a pattern.
sub Substitute occurrences of a pattern found in a string.
subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a Match object for each match.
compile Compile a pattern into a Pattern object.
purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string.

Each function other than purge and escape can take an optional 'flags' argument
consisting of one or more of the following module constants, joined by "|".
Expand Down Expand Up @@ -130,7 +131,7 @@

# public symbols
__all__ = [
"match", "fullmatch", "search", "sub", "subn", "split",
"prefixmatch", "match", "fullmatch", "search", "sub", "subn", "split",
"findall", "finditer", "compile", "purge", "escape",
"error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
Expand Down Expand Up @@ -159,10 +160,13 @@ class RegexFlag:
# --------------------------------------------------------------------
# public interface

def match(pattern, string, flags=0):
def prefixmatch(pattern, string, flags=0):
"""Try to apply the pattern at the start of the string, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
return _compile(pattern, flags).prefixmatch(string)

# Our original name which was less explicitly clear about the behavior for prefixmatch.
match = prefixmatch

def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning
Expand Down Expand Up @@ -311,7 +315,7 @@ def escape(pattern):
return pattern.translate(_special_chars_map).encode('latin1')

Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))
Match = type(_compiler.compile('', 0).prefixmatch(''))

# --------------------------------------------------------------------
# internals
Expand Down Expand Up @@ -410,10 +414,10 @@ def __init__(self, lexicon, flags=0):
def scan(self, string):
result = []
append = result.append
match = self.scanner.scanner(string).match
_match = self.scanner.scanner(string).prefixmatch
i = 0
while True:
m = match()
m = _match()
if not m:
break
j = m.end()
Expand Down
5 changes: 4 additions & 1 deletion Lib/test/test_inspect/test_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -6277,7 +6277,10 @@ def test_pwd_module_has_signatures(self):

def test_re_module_has_signatures(self):
import re
methods_no_signature = {'Match': {'group'}}
methods_no_signature = {
'Match': {'group'},
'Pattern': {'match'}, # It is now an alias for prefixmatch
}
self._test_module_has_signatures(re,
methods_no_signature=methods_no_signature,
good_exceptions={'error', 'PatternError'})
Expand Down
22 changes: 17 additions & 5 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,13 @@ def test_search_star_plus(self):
self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
self.assertIsNone(re.search('x', 'aaa'))
self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
self.assertEqual(re.prefixmatch('a*', 'xxx').span(0), (0, 0))
self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
self.assertEqual(re.prefixmatch('x*', 'xxxa').span(0), (0, 3))
self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
self.assertIsNone(re.match('a+', 'xxx'))
self.assertIsNone(re.prefixmatch('a+', 'xxx'))

def test_branching(self):
"""Test Branching
Expand Down Expand Up @@ -180,6 +183,7 @@ def test_bug_449000(self):
def test_bug_1661(self):
# Verify that flags do not get silently ignored with compiled patterns
pattern = re.compile('.')
self.assertRaises(ValueError, re.prefixmatch, pattern, 'A', re.I)
self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
Expand Down Expand Up @@ -517,6 +521,8 @@ def test_re_match(self):
self.assertEqual(re.match(b'(a)', string).group(0), b'a')
self.assertEqual(re.match(b'(a)', string).group(1), b'a')
self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
self.assertEqual(re.prefixmatch(b'(a)', string).group(1, 1),
(b'a', b'a'))
for a in ("\xe0", "\u0430", "\U0001d49c"):
self.assertEqual(re.match(a, a).groups(), ())
self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
Expand Down Expand Up @@ -558,10 +564,8 @@ def __index__(self):
self.assertEqual(m.group(2, 1), ('b', 'a'))
self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))

def test_match_getitem(self):
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')

m = pat.match('a')
def do_test_match_getitem(self, match_fn):
m = match_fn('a')
self.assertEqual(m['a1'], 'a')
self.assertEqual(m['b2'], None)
self.assertEqual(m['c3'], None)
Expand All @@ -585,7 +589,7 @@ def test_match_getitem(self):
with self.assertRaisesRegex(IndexError, 'no such group'):
'a1={a2}'.format_map(m)

m = pat.match('ac')
m = match_fn('ac')
self.assertEqual(m['a1'], 'a')
self.assertEqual(m['b2'], None)
self.assertEqual(m['c3'], 'c')
Expand All @@ -602,6 +606,14 @@ def test_match_getitem(self):
# No len().
self.assertRaises(TypeError, len, m)

def test_match_getitem(self):
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
self.do_test_match_getitem(pat.match)

def test_prefixmatch_getitem(self):
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
self.do_test_match_getitem(pat.prefixmatch)

def test_re_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
Expand Down
10 changes: 10 additions & 0 deletions Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
The :mod:`re` module gains a new :func:`re.prefixmatch` function as an
explicit spelling of what has to date always been known as :func:`re.match`.
:class:`re.Pattern` similary gains a :meth:`re.Pattern.prefixmatch` method.

Why? Explicit is better than implicit. Other widely used languages all use
the term "match" to mean what Python uses the term "search" for. The
unadorened "match" name in Python has been a frequent case of confusion and
coding bugs due to the inconsistency with the rest if the software industry.

We do not plan to deprecate and remove the older ``match`` name.
38 changes: 19 additions & 19 deletions Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading