Skip to content

Commit db3f68d

Browse files
gh-71679: Share the repr implementation between bytes and bytearray
This allows to use the smart quotes algorithm in the bytearray's repr.
1 parent 0dbbf61 commit db3f68d

File tree

5 files changed

+30
-91
lines changed

5 files changed

+30
-91
lines changed

Include/internal/pycore_bytes_methods.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len,
4747
/* The maketrans() static method. */
4848
extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to);
4949

50+
/* Helper for repr(). */
51+
extern PyObject *_Py_bytes_repr(const char *, Py_ssize_t, int);
52+
5053
/* Shared __doc__ strings. */
5154
extern const char _Py_isspace__doc__[];
5255
extern const char _Py_isalpha__doc__[];

Lib/test/test_bytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1979,7 +1979,7 @@ def test_bytearray_repr(self, f=repr):
19791979
self.assertEqual(f(bytearray([7, 8, 9, 10, 11, 12, 13])),
19801980
r"bytearray(b'\x07\x08\t\n\x0b\x0c\r')")
19811981
self.assertEqual(f(bytearray(b'"')), """bytearray(b'"')""") # '"'
1982-
self.assertEqual(f(bytearray(b"'")), r'''bytearray(b"\'")''') # "\'"
1982+
self.assertEqual(f(bytearray(b"'")), '''bytearray(b"'")''') # "'"
19831983
self.assertEqual(f(bytearray(b"'\"")), r"""bytearray(b'\'"')""") # '\'"'
19841984
self.assertEqual(f(bytearray(b"\"'\"")), r"""bytearray(b'"\'"')""") # '"\'"'
19851985
self.assertEqual(f(bytearray(b'\'"\'')), r"""bytearray(b'\'"\'')""") # '\'"\''
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Use the same quoting algorithm for the repr of bytearrays as for bytes
2+
objects and strings -- use double quotes for quoting if the bytearray
3+
contains single quotes and does not contain double quotes.

Objects/bytearrayobject.c

Lines changed: 11 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,95 +1067,23 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
10671067
return -1;
10681068
}
10691069

1070-
/* Mostly copied from string_repr, but without the
1071-
"smart quote" functionality. */
10721070
static PyObject *
10731071
bytearray_repr_lock_held(PyObject *op)
10741072
{
10751073
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
1076-
PyByteArrayObject *self = _PyByteArray_CAST(op);
1077-
const char *className = _PyType_Name(Py_TYPE(self));
1078-
const char *quote_prefix = "(b";
1079-
const char *quote_postfix = ")";
1080-
Py_ssize_t length = Py_SIZE(self);
1081-
/* 6 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */
1082-
Py_ssize_t newsize;
1083-
PyObject *v;
1084-
Py_ssize_t i;
1085-
char *bytes;
1086-
char c;
1087-
char *p;
1088-
int quote;
1089-
char *test, *start;
1090-
char *buffer;
1091-
1092-
newsize = strlen(className);
1093-
if (length > (PY_SSIZE_T_MAX - 6 - newsize) / 4) {
1094-
PyErr_SetString(PyExc_OverflowError,
1095-
"bytearray object is too large to make repr");
1096-
return NULL;
1097-
}
1098-
1099-
newsize += 6 + length * 4;
1100-
buffer = PyMem_Malloc(newsize);
1101-
if (buffer == NULL) {
1102-
PyErr_NoMemory();
1103-
return NULL;
1104-
}
1105-
1106-
/* Figure out which quote to use; single is preferred */
1107-
quote = '\'';
1108-
start = PyByteArray_AS_STRING(self);
1109-
for (test = start; test < start+length; ++test) {
1110-
if (*test == '"') {
1111-
quote = '\''; /* back to single */
1112-
break;
1113-
}
1114-
else if (*test == '\'')
1115-
quote = '"';
1116-
}
1117-
1118-
p = buffer;
1119-
while (*className)
1120-
*p++ = *className++;
1121-
while (*quote_prefix)
1122-
*p++ = *quote_prefix++;
1123-
*p++ = quote;
1124-
1125-
bytes = PyByteArray_AS_STRING(self);
1126-
for (i = 0; i < length; i++) {
1127-
/* There's at least enough room for a hex escape
1128-
and a closing quote. */
1129-
assert(newsize - (p - buffer) >= 5);
1130-
c = bytes[i];
1131-
if (c == '\'' || c == '\\')
1132-
*p++ = '\\', *p++ = c;
1133-
else if (c == '\t')
1134-
*p++ = '\\', *p++ = 't';
1135-
else if (c == '\n')
1136-
*p++ = '\\', *p++ = 'n';
1137-
else if (c == '\r')
1138-
*p++ = '\\', *p++ = 'r';
1139-
else if (c == 0)
1140-
*p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
1141-
else if (c < ' ' || c >= 0x7f) {
1142-
*p++ = '\\';
1143-
*p++ = 'x';
1144-
*p++ = Py_hexdigits[(c & 0xf0) >> 4];
1145-
*p++ = Py_hexdigits[c & 0xf];
1074+
const char *className = _PyType_Name(Py_TYPE(op));
1075+
PyObject *bytes_repr = _Py_bytes_repr(PyByteArray_AS_STRING(op),
1076+
PyByteArray_GET_SIZE(op), 1);
1077+
if (bytes_repr == NULL) {
1078+
if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
1079+
PyErr_SetString(PyExc_OverflowError,
1080+
"bytearray object is too large to make repr");
11461081
}
1147-
else
1148-
*p++ = c;
1149-
}
1150-
assert(newsize - (p - buffer) >= 1);
1151-
*p++ = quote;
1152-
while (*quote_postfix) {
1153-
*p++ = *quote_postfix++;
1082+
return NULL;
11541083
}
1155-
1156-
v = PyUnicode_FromStringAndSize(buffer, p - buffer);
1157-
PyMem_Free(buffer);
1158-
return v;
1084+
PyObject *res = PyUnicode_FromFormat("%s(%U)", className, bytes_repr);
1085+
Py_DECREF(bytes_repr);
1086+
return res;
11591087
}
11601088

11611089
static PyObject *

Objects/bytesobject.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,27 +1340,32 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
13401340
PyObject *
13411341
PyBytes_Repr(PyObject *obj, int smartquotes)
13421342
{
1343-
PyBytesObject* op = (PyBytesObject*) obj;
1344-
Py_ssize_t i, length = Py_SIZE(op);
1343+
return _Py_bytes_repr(PyBytes_AS_STRING(obj),
1344+
PyBytes_GET_SIZE(obj), smartquotes);
1345+
}
1346+
1347+
PyObject *
1348+
_Py_bytes_repr(const char *data, Py_ssize_t length, int smartquotes)
1349+
{
1350+
Py_ssize_t i;
13451351
Py_ssize_t newsize, squotes, dquotes;
13461352
PyObject *v;
13471353
unsigned char quote;
1348-
const unsigned char *s;
13491354
Py_UCS1 *p;
13501355

13511356
/* Compute size of output string */
13521357
squotes = dquotes = 0;
13531358
newsize = 3; /* b'' */
1354-
s = (const unsigned char*)op->ob_sval;
13551359
for (i = 0; i < length; i++) {
1360+
unsigned char c = data[i];
13561361
Py_ssize_t incr = 1;
1357-
switch(s[i]) {
1362+
switch(c) {
13581363
case '\'': squotes++; break;
13591364
case '"': dquotes++; break;
13601365
case '\\': case '\t': case '\n': case '\r':
13611366
incr = 2; break; /* \C */
13621367
default:
1363-
if (s[i] < ' ' || s[i] >= 0x7f)
1368+
if (c < ' ' || c >= 0x7f)
13641369
incr = 4; /* \xHH */
13651370
}
13661371
if (newsize > PY_SSIZE_T_MAX - incr)
@@ -1384,7 +1389,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
13841389

13851390
*p++ = 'b', *p++ = quote;
13861391
for (i = 0; i < length; i++) {
1387-
unsigned char c = op->ob_sval[i];
1392+
unsigned char c = data[i];
13881393
if (c == quote || c == '\\')
13891394
*p++ = '\\', *p++ = c;
13901395
else if (c == '\t')

0 commit comments

Comments
 (0)