methane · methane · Jan 2, 2026 · Jan 4, 2026 · Jan 20, 2026 · Jan 21, 2026
diff --git a/Lib/test/test_dstring.py b/Lib/test/test_dstring.py
@@ -0,0 +1,51 @@
+import unittest
+
+_dstring_prefixes = "d db df dt dr drb drf drt".split()
+_dstring_prefixes += [p.upper() for p in _dstring_prefixes]
+
+class DStringTestCase(unittest.TestCase):
+    def assertAllRaise(self, exception_type, regex, error_strings):
+        for str in error_strings:
+            with self.subTest(str=str):
+                with self.assertRaisesRegex(exception_type, regex) as cm:
+                    eval(str)
+
+    def test_single_quote(self):
+        exprs = [
+            f"{p}'hello, world'" for p in _dstring_prefixes
+        ] + [
+            f'{p}"hello, world"' for p in _dstring_prefixes
+        ]
+        self.assertAllRaise(SyntaxError, "d-string must be triple-quoted", exprs)
+
+    def test_empty_dstring(self):
+        exprs = [
+            f"{p}''''''" for p in _dstring_prefixes
+        ] + [
+            f'{p}""""""' for p in _dstring_prefixes
+        ]
+        self.assertAllRaise(SyntaxError, "d-string must start with a newline", exprs)
+
+    def test_simple_dstring(self):
+        cases = [
+            ('{prefix}"""\n  hello world\n  """', "hello world\n"),
+            ('{prefix}"""\n  hello world\n """', " hello world\n"),
+            ('{prefix}"""\n  hello world\n"""', "  hello world\n"),
+            ('{prefix}"""\n  hello world\\\n """', " hello world"),
+            ('{prefix}"""\n  hello world\\\n """', " hello world\\\n"),
+        ]
+
+        for p in _dstring_prefixes:
+            bstring = 'b' in p.lower()
+            rstring = 'r' in p.lower()
+            for source, expected in cases:
+                source = source.format(prefix=p)
+                if rstring:
+                    expected = expected.replace('\\', '\\\\').replace('\n', '\\n')
+                if bstring:
+                    expected = expected.encode()
+                self.assertEqual(eval(source), expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -3420,7 +3420,7 @@ def determine_valid_prefixes():
         # some uppercase-only prefix is added.
         for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase):
             try:
-                eval(f'{letter}""')
+                eval(f'{letter}"""\n"""')  # d-string needs multiline
                 single_char_valid_prefixes.add(letter.lower())
             except SyntaxError:
                 pass
@@ -3444,7 +3444,7 @@ def determine_valid_prefixes():
                             # because it's a valid expression: not ""
                             continue
                         try:
-                            eval(f'{p}""')
+                            eval(f'{p}"""\n"""')  # d-string needs multiline
 
                             # No syntax error, so p is a valid string
                             # prefix.

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
@@ -86,7 +86,8 @@ def _all_string_prefixes():
     # The valid string prefixes. Only contain the lower case versions,
     #  and don't contain any permutations (include 'fr', but not
     #  'rf'). The various permutations will be generated.
-    _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr']
+    _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'd', 'br', 'fr', 'tr',
+                              'bd', 'rd', 'fd', 'td', 'brd', 'frd', 'trd']
     # if we add binary f-strings, add: ['fb', 'fbr']
     result = {''}
     for prefix in _valid_string_prefixes:

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -13480,8 +13480,8 @@ of all lines in the [src, end).
 It returns the length of the common leading whitespace and sets `output` to
 point to the beginning of the common leading whitespace if length > 0.
 */
-static Py_ssize_t
-search_longest_common_leading_whitespace(
+Py_ssize_t
+_Py_search_longest_common_leading_whitespace(
     const char *const src,
     const char *const end,
     const char **output)
@@ -13576,7 +13576,7 @@ _PyUnicode_Dedent(PyObject *unicode)
     // [whitespace_start, whitespace_start + whitespace_len)
     // describes the current longest common leading whitespace
     const char *whitespace_start = NULL;
-    Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
+    Py_ssize_t whitespace_len = _Py_search_longest_common_leading_whitespace(
         src, end, &whitespace_start);
 
     if (whitespace_len == 0) {

diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
@@ -1292,24 +1292,146 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq
 
 // Fstring stuff
 
+static int
+unicodewriter_write_line(Parser *p, PyUnicodeWriter *w, const char *line_start, const char *line_end,
+                         int is_raw, Token* token)
+{
+    if (is_raw || memchr(line_start, '\\', line_end - line_start) == NULL) {
+        return PyUnicodeWriter_WriteUTF8(w, line_start, line_end - line_start);
+    }
+    else {
+        PyObject *line = _PyPegen_decode_string(p, 1, line_start, line_end - line_start, token);
+        if (line == NULL || PyUnicodeWriter_WriteStr(w, line) < 0) {
+            Py_XDECREF(line);
+            return -1;
+        }
+        Py_DECREF(line);
+    }
+    return 0;
+}
+
+static PyObject*
+_PyPegen_dedent_string_part(
+        Parser *p, const char *s, size_t len, const char *indent, Py_ssize_t indent_len,
+        int is_first, int is_raw, expr_ty constant, Token* token)
+{
+    Py_ssize_t lineno = constant->lineno;
+    const char *line_start = s;
+    const char *s_end = s + len;
+
+    int _prev_call_invalid = p->call_invalid_rules;
+    if (!_prev_call_invalid && !is_raw) {
+        // _PyPegen_decode_string() and decode_bytes_with_escapes() may call
+        // warn_invalid_escape_sequence(). It may emit issue or raise SyntaxError
+        // for invalid escape sequences.
+        // We need to call it before dedenting since SyntaxError needs exact lineno
+        // and col_offset of invalid escape sequences.
+        PyObject *t = _PyPegen_decode_string(p, 0, s, len, token);
+        if (t == NULL) {
+            return NULL;
+        }
+        Py_DECREF(t);
+        p->call_invalid_rules = 1;
+    }
+
+    PyUnicodeWriter *w = PyUnicodeWriter_Create(len);
+    if (w == NULL) {
+        return NULL;
+    }
+
+    if (is_first) {
+        assert (line_start[0] == '\n');
+        line_start++;  // skip the first newline
+    }
+    else {
+        // Example: df"""
+        //      first part {param} second part
+        //      next line
+        //    """"
+        // We don't need to dedent the first line in the non-first parts.
+        const char *line_end = memchr(line_start, '\n', s_end - line_start);
+        if (line_end) {
+            line_end++; // include the newline
+        }
+        else {
+            line_end = s_end;
+        }
+        if (unicodewriter_write_line(p, w, line_start, line_end, is_raw, token) < 0) {
+            goto error;
+        }
+        line_start = line_end;
+    }
+
+    while (line_start < s + len) {
+        lineno++;
+
+        Py_ssize_t i = 0;
+        while (line_start + i < s_end && i < indent_len && line_start[i] == indent[i]) {
+            i++;
+        }
+
+        if (line_start[i] == '\0') {  // found an empty line without newline.
+            break;
+        }
+        if (line_start[i] == '\n') {  // found an empty line with newline.
+            if (PyUnicodeWriter_WriteChar(w, '\n') < 0) {
+                goto error;
+            }
+            line_start += i+1;
+            continue;
+        }
+        if (i < indent_len) {  // found an invalid indent.
+            assert(line_start[i] != indent[i]);
+            RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, i, lineno, i+1,
+                "d-string line missing valid indentation");
+            goto error;
+        }
+
+        // found a indented line. let's dedent it.
+        line_start += i;
+        const char *line_end = memchr(line_start, '\n', s_end - line_start);
+        if (line_end) {
+            line_end++; // include the newline
+        }
+        else {
+            line_end = s_end;
+        }
+        if (unicodewriter_write_line(p, w, line_start, line_end, is_raw, token) < 0) {
+            goto error;
+        }
+        line_start = line_end;
+    }
+    p->call_invalid_rules = _prev_call_invalid;
+    return  PyUnicodeWriter_Finish(w);
+
+error:
+    p->call_invalid_rules = _prev_call_invalid;
+    PyUnicodeWriter_Discard(w);
+    return NULL;
+}
+
 static expr_ty
-_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) {
+_PyPegen_decode_fstring_part(Parser* p, int is_first, int is_raw,
+                             const char *indent, Py_ssize_t indent_len,
+                             expr_ty constant, Token* token)
+{
     assert(PyUnicode_CheckExact(constant->v.Constant.value));
 
     const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
     if (bstr == NULL) {
         return NULL;
     }
+    is_raw = is_raw || strchr(bstr, '\\') == NULL;
 
-    size_t len;
-    if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
-        len = 1;
-    } else {
-        len = strlen(bstr);
+    PyObject *str = NULL;
+    if (indent_len > 0) {
+        str = _PyPegen_dedent_string_part(p, bstr, strlen(bstr), indent, indent_len,
+                                          is_first, is_raw, constant, token);
+    }
+    else {
+        str = _PyPegen_decode_string(p, is_raw, bstr, strlen(bstr), token);
     }
 
-    is_raw = is_raw || strchr(bstr, '\\') == NULL;
-    PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token);
     if (str == NULL) {
         _Pypegen_raise_decode_error(p);
         return NULL;
@@ -1323,6 +1445,14 @@ _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* tok
                            p->arena);
 }
 
+/* defined in unicodeobject.c */
+extern Py_ssize_t
+_Py_search_longest_common_leading_whitespace(
+    const char *const src,
+    const char *const end,
+    const char **output
+    );
+
 static asdl_expr_seq *
 _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, enum string_kind_t string_kind)
 {
@@ -1340,12 +1470,82 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
         return NULL;
     }
     int is_raw = strpbrk(quote_str, "rR") != NULL;
+    int is_dedent = strpbrk(quote_str, "dD") != NULL;
 
     asdl_expr_seq *seq = _Py_asdl_expr_seq_new(total_items, p->arena);
     if (seq == NULL) {
         return NULL;
     }
 
+    const char *common_indent_start = NULL;
+    Py_ssize_t common_indent_len = 0;
+
+    if (is_dedent) {
+        if (total_items == 0) {
+            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
+                a,
+                "d-string must start with a newline"
+            );
+            return NULL;
+        }
+        expr_ty first_item = asdl_seq_GET(raw_expressions, 0);
+        if (first_item->kind != Constant_kind
+                || PyUnicode_ReadChar(first_item->v.Constant.value, 0) != '\n') {
+            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
+                first_item,
+                "d-string must start with a newline"
+            );
+            return NULL;
+        }
+
+        // Instead of calculating common indent from all parts,
+        // build temporary string and calculate common indent from it.
+        PyBytesWriter *w = PyBytesWriter_Create(0);
+        if (w == NULL) {
+            return NULL;
+        }
+
+        for (Py_ssize_t i = 0; i < n_items; i++) {
+            expr_ty item = asdl_seq_GET(raw_expressions, i);
+
+            if (item->kind == JoinedStr_kind) {
+                // Write a placeholder.
+                if (PyBytesWriter_WriteBytes(w, "X", 1) < 0) {
+                    PyBytesWriter_Discard(w);
+                    return NULL;
+                }
+                continue;
+            }
+            if (item->kind == Constant_kind) {
+                Py_ssize_t blen;
+                const char *bstr = PyUnicode_AsUTF8AndSize(item->v.Constant.value, &blen);
+                if (bstr == NULL || PyBytesWriter_WriteBytes(w, bstr, blen) < 0) {
+                    PyBytesWriter_Discard(w);
+                    return NULL;
+                }
+                continue;
+            }
+        }
+        // Add a terminator to include the last line before the ending quote
+        if (PyBytesWriter_WriteBytes(w, "X", 1) < 0) {
+            PyBytesWriter_Discard(w);
+            return NULL;
+        }
+
+        // TODO: instead of creating temp_bytes, we could search
+        // common index from each part directly. But this need reimplementation
+        // of _Py_search_longest_common_leading_whitespace.
+        PyObject *temp_bytes = PyBytesWriter_Finish(w);
+        if (temp_bytes == NULL) {
+            return NULL;
+        }
+        _PyArena_AddPyObject(p->arena, temp_bytes);
+        const char *temp_str = PyBytes_AsString(temp_bytes);
+        const char *temp_end = temp_str + PyBytes_GET_SIZE(temp_bytes);
+        common_indent_len = _Py_search_longest_common_leading_whitespace(
+            temp_str, temp_end, &common_indent_start);
+    }
+
     Py_ssize_t index = 0;
     for (Py_ssize_t i = 0; i < n_items; i++) {
         expr_ty item = asdl_seq_GET(raw_expressions, i);
@@ -1377,7 +1577,7 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
         }
 
         if (item->kind == Constant_kind) {
-            item = _PyPegen_decode_fstring_part(p, is_raw, item, b);
+            item = _PyPegen_decode_fstring_part(p, i == 0, is_raw, common_indent_start, common_indent_len, item, b);
             if (item == NULL) {
                 return NULL;
             }