python
diff --git a/‎Doc/library/re.rst‎
Lines changed: 46 additions & 1 deletion b/‎Doc/library/re.rst‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎Doc/whatsnew/3.16.rst‎
Lines changed: 7 additions & 0 deletions b/‎Doc/whatsnew/3.16.rst‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎Lib/re/_constants.py‎
Lines changed: 63 additions & 1 deletion b/‎Lib/re/_constants.py‎
Lines changed: 63 additions & 1 deletion
diff --git a/‎Lib/re/_parser.py‎
Lines changed: 29 additions & 3 deletions b/‎Lib/re/_parser.py‎
Lines changed: 29 additions & 3 deletions
@@ -613,7 +613,7 @@ character ``'$'``.
 
       Matches ``[0-9]`` if the :py:const:`~re.ASCII` flag is used.
 
-      __ https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G134153
+      __ https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-4/#G124142
 
    For 8-bit (bytes) patterns:
       Matches any decimal digit in the ASCII character set;
@@ -680,6 +680,51 @@ character ``'$'``.
    matches characters which are neither alphanumeric in the current locale
    nor the underscore.
 
+.. index:: single: \p; in regular expressions
+           single: \P; in regular expressions
+
+``\p{property=value}``, ``\p{value}``
+   Matches any character with the given Unicode property
+   (see `Unicode Technical Standard #18
+   <https://unicode.org/reports/tr18/>`_, requirement RL1.2 "Properties").
+   Property and value names are matched loosely:
+   case, whitespace, ``'-'`` and ``'_'`` are ignored.
+   The following properties are supported:
+
+   * The ``General_Category`` property (short name ``gc``),
+     spelled ``\p{Lu}``, ``\p{gc=Lu}`` or, for a one-letter group, ``\p{L}``.
+     The supported values are the groups ``L``, ``N``, ``Z`` and ``C`` and the
+     values ``Lu``, ``Lt``, ``Lm``, ``Nd``, ``Nl``, ``No``, ``Zs``, ``Zl``,
+     ``Zp``, ``Cc``, ``Cf``, ``Cs``, ``Co`` and ``Cn``.
+   * The binary properties ``XID_Start``, ``XID_Continue``, ``Alphabetic``,
+     ``Lowercase``, ``Uppercase``, ``Numeric``, ``Printable``, ``Cased`` and
+     ``Case_Ignorable``.  A binary property may also be spelled
+     ``\p{name=yes}`` or ``\p{name=no}``.
+   * The POSIX compatibility classes ``alpha``, ``alnum``, ``blank``,
+     ``cntrl``, ``digit``, ``graph``, ``lower``, ``print``, ``space``,
+     ``upper``, ``word`` and ``xdigit``.
+   * The properties ``ASCII``, ``Any``, ``Assigned``,
+     ``Noncharacter_Code_Point``, ``Join_Control``, ``Regional_Indicator``,
+     ``ASCII_Hex_Digit``, ``Hex_Digit``, ``Pattern_Syntax`` and
+     ``Pattern_White_Space``.
+
+   Where a supported property corresponds to a :mod:`unicodedata` accessor or
+   :class:`str` method, the set of characters it matches is exactly the one
+   they report.  For consistency with these, ``space`` follows
+   :py:meth:`str.isspace` (like ``\s``) and ``xdigit`` matches only the ASCII
+   hexadecimal digits.
+
+   This is only recognized in Unicode (str) patterns.
+   In bytes patterns it is an error.
+
+   .. versionadded:: next
+
+``\P{...}``
+   Matches any character which does *not* have the given Unicode property.
+   This is the opposite of ``\p``.
+
+   .. versionadded:: next
+
 .. index:: single: \z; in regular expressions
            single: \Z; in regular expressions
 
 
@@ -192,6 +192,13 @@ re
   matches an ASCII lowercase consonant.
   (Contributed by Serhiy Storchaka in :gh:`152100`.)
 
+* Regular expressions now support Unicode property escapes ``\p{...}`` and
+  ``\P{...}``, which match a character by a Unicode property -- for example
+  ``\p{Lu}`` (an uppercase letter), ``\p{Cased}`` or ``\p{ASCII}``.  See
+  :ref:`the regular expression syntax <re-syntax>` for the supported
+  properties.
+  (Contributed by Serhiy Storchaka in :gh:`95555`.)
+
 
 shlex
 -----
 
@@ -13,7 +13,7 @@
 
 # update when constants are added or removed
 
-MAGIC = 20230612
+MAGIC = 20260622
 
 from _sre import MAXREPEAT, MAXGROUPS  # noqa: F401
 
@@ -150,6 +150,35 @@ def _makecodes(*names):
     'CATEGORY_UNI_SPACE', 'CATEGORY_UNI_NOT_SPACE',
     'CATEGORY_UNI_WORD', 'CATEGORY_UNI_NOT_WORD',
     'CATEGORY_UNI_LINEBREAK', 'CATEGORY_UNI_NOT_LINEBREAK',
+
+    # Unicode property categories.  These are not affected by the ASCII,
+    # LOCALE or UNICODE flags.
+    'CATEGORY_ALPHA', 'CATEGORY_NOT_ALPHA',
+    'CATEGORY_LOWER', 'CATEGORY_NOT_LOWER',
+    'CATEGORY_UPPER', 'CATEGORY_NOT_UPPER',
+    'CATEGORY_NUMERIC', 'CATEGORY_NOT_NUMERIC',
+    'CATEGORY_PRINTABLE', 'CATEGORY_NOT_PRINTABLE',
+    'CATEGORY_ALNUM', 'CATEGORY_NOT_ALNUM',
+    'CATEGORY_XID_START', 'CATEGORY_NOT_XID_START',
+    'CATEGORY_XID_CONTINUE', 'CATEGORY_NOT_XID_CONTINUE',
+    'CATEGORY_TITLE', 'CATEGORY_NOT_TITLE',
+    'CATEGORY_CASED', 'CATEGORY_NOT_CASED',
+    'CATEGORY_CASE_IGNORABLE', 'CATEGORY_NOT_CASE_IGNORABLE',
+    # Compound categories: Lu = uppercase letter, N = number.
+    'CATEGORY_LU', 'CATEGORY_NOT_LU',
+    'CATEGORY_N', 'CATEGORY_NOT_N',
+    'CATEGORY_LM', 'CATEGORY_NOT_LM',
+    'CATEGORY_NL', 'CATEGORY_NOT_NL',
+    'CATEGORY_NO', 'CATEGORY_NOT_NO',
+    'CATEGORY_CF', 'CATEGORY_NOT_CF',
+    'CATEGORY_Z', 'CATEGORY_NOT_Z',
+    'CATEGORY_ZS', 'CATEGORY_NOT_ZS',
+    'CATEGORY_C', 'CATEGORY_NOT_C',
+    'CATEGORY_CN', 'CATEGORY_NOT_CN',
+    'CATEGORY_ASSIGNED', 'CATEGORY_NOT_ASSIGNED',
+    'CATEGORY_BLANK', 'CATEGORY_NOT_BLANK',
+    'CATEGORY_GRAPH', 'CATEGORY_NOT_GRAPH',
+    'CATEGORY_PRINT', 'CATEGORY_NOT_PRINT',
 )
 
 
@@ -206,6 +235,39 @@ def _makecodes(*names):
     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
 }
 
+# The Unicode property categories are the same regardless of the flags.
+CH_PROPERTY = (
+    CATEGORY_ALPHA, CATEGORY_NOT_ALPHA,
+    CATEGORY_LOWER, CATEGORY_NOT_LOWER,
+    CATEGORY_UPPER, CATEGORY_NOT_UPPER,
+    CATEGORY_NUMERIC, CATEGORY_NOT_NUMERIC,
+    CATEGORY_PRINTABLE, CATEGORY_NOT_PRINTABLE,
+    CATEGORY_ALNUM, CATEGORY_NOT_ALNUM,
+    CATEGORY_XID_START, CATEGORY_NOT_XID_START,
+    CATEGORY_XID_CONTINUE, CATEGORY_NOT_XID_CONTINUE,
+    CATEGORY_TITLE, CATEGORY_NOT_TITLE,
+    CATEGORY_CASED, CATEGORY_NOT_CASED,
+    CATEGORY_CASE_IGNORABLE, CATEGORY_NOT_CASE_IGNORABLE,
+    CATEGORY_LU, CATEGORY_NOT_LU,
+    CATEGORY_N, CATEGORY_NOT_N,
+    CATEGORY_LM, CATEGORY_NOT_LM,
+    CATEGORY_NL, CATEGORY_NOT_NL,
+    CATEGORY_NO, CATEGORY_NOT_NO,
+    CATEGORY_CF, CATEGORY_NOT_CF,
+    CATEGORY_Z, CATEGORY_NOT_Z,
+    CATEGORY_ZS, CATEGORY_NOT_ZS,
+    CATEGORY_C, CATEGORY_NOT_C,
+    CATEGORY_CN, CATEGORY_NOT_CN,
+    CATEGORY_ASSIGNED, CATEGORY_NOT_ASSIGNED,
+    CATEGORY_BLANK, CATEGORY_NOT_BLANK,
+    CATEGORY_GRAPH, CATEGORY_NOT_GRAPH,
+    CATEGORY_PRINT, CATEGORY_NOT_PRINT,
+)
+for _cat in CH_PROPERTY:
+    CH_LOCALE[_cat] = _cat
+    CH_UNICODE[_cat] = _cat
+del _cat
+
 CH_NEGATE = dict(zip(CHCODES[::2] + CHCODES[1::2], CHCODES[1::2] + CHCODES[::2]))
 
 # flags
 
@@ -310,6 +310,22 @@ def checkgroupname(self, name, offset):
             msg = "bad character in group name %r" % name
             raise self.error(msg, len(name) + offset)
 
+def _property_escape(source, escape, in_set=False):
+    # handle \p{...} and \P{...} (UTS #18 1.2.4, "Property Syntax")
+    from . import _properties
+    if not source.match('{'):
+        raise source.error("missing {, expected property name")
+    name = source.getuntil('}', 'property name')
+    code = _properties.parse_property(name, escape[1] == 'P')
+    if code is None:
+        raise source.error("unknown property name %r" % name,
+                           len(name) + len(r'\p{}'))
+    if in_set and code[1][0] == (NEGATE, None):
+        # A negated multi-range property cannot be a member of a set.
+        raise source.error("bad escape %s in character class" % escape,
+                           len(name) + len(r'\p{}'))
+    return code
+
 def _class_escape(source, escape):
     # handle escape code inside character class
     code = ESCAPES.get(escape)
@@ -352,6 +368,8 @@ def _class_escape(source, escape):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}')) from None
             return LITERAL, c
+        elif c in "pP" and source.istext:
+            return _property_escape(source, escape, in_set=True)
         elif c in OCTDIGITS:
             # octal escape (up to three digits)
             escape += source.getwhile(2, OCTDIGITS)
@@ -412,6 +430,8 @@ def _escape(source, escape, state):
                 raise source.error("undefined character name %r" % charname,
                                    len(charname) + len(r'\N{}')) from None
             return LITERAL, c
+        elif c in "pP" and source.istext:
+            return _property_escape(source, escape)
         elif c == "0":
             # octal escape
             escape += source.getwhile(2, OCTDIGITS)
@@ -566,6 +586,12 @@ def _parse_operand(source, state, nested, here, allow_nested):
     sourcematch = source.match
     set = []
     setappend = set.append
+    def addmember(code):
+        # Flatten a \p{...} property's IN into the member set.
+        if code[0] is IN:
+            set.extend(code[1])
+        else:
+            setappend(code)
     compound = None     # elements of a standalone nested-set operand
     if allow_nested and sourcematch("["):
         # A nested set after an operator is the whole operand, used as-is (not
@@ -608,13 +634,13 @@ def _parse_operand(source, state, nested, here, allow_nested):
                                    source.tell() - here)
             if that == "]":
                 # A trailing '-' is a literal.
-                setappend(code1)
+                addmember(code1)
                 setappend((LITERAL, _ord("-")))
                 return [_charset_node(_uniq(set))], None
             if that == "-":
                 # 'X--': difference, not a range.  '--' after a single member
                 # lands here because the range probe consumed the first '-'.
-                setappend(code1)
+                addmember(code1)
                 return [_charset_node(_uniq(set))], "--"
             if that[0] == "\\":
                 code2 = _class_escape(source, that)
@@ -630,7 +656,7 @@ def _parse_operand(source, state, nested, here, allow_nested):
                 raise source.error(msg, len(this) + 1 + len(that))
             setappend((RANGE, (lo, hi)))
         else:
-            setappend(code1)
+            addmember(code1)
 
 def _complement(elements, state):
     # The complement of `elements` (a single matcher, or a set operation as a