Skip to content

Commit 962fb87

Browse files
gh-145850: Change some implementation details in struct.Struct (GH-145851)
* calling it with non-ASCII string format will now raise a ValueError instead of UnicodeEncodeError * calling it with non-ASCII bytes format will now raise a ValueError instead of struct.error * getting the format attribute of uninitialized object will now raise an AttributeError instead of RuntimeError.
1 parent e1c2246 commit 962fb87

File tree

4 files changed

+48
-51
lines changed

4 files changed

+48
-51
lines changed

Lib/test/test_struct.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ def test_Struct_reinitialization(self):
605605
self.assertEqual(s.unpack(packed), (1, 2))
606606

607607
with self.assertWarnsRegex(FutureWarning, msg):
608-
with self.assertRaises(UnicodeEncodeError):
608+
with self.assertRaises(ValueError):
609609
s.__init__('\udc00')
610610
self.assertEqual(s.format, '>hh')
611611
self.assertEqual(s.pack(1, 2), packed)
@@ -872,10 +872,10 @@ def __init__(self, *args, **kwargs):
872872
with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'):
873873
my_struct = MyStruct(format='$')
874874
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
875-
with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"):
875+
with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
876876
my_struct = MyStruct('\udc00')
877877
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
878-
with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"):
878+
with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
879879
my_struct = MyStruct(format='\udc00')
880880
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
881881

@@ -928,11 +928,16 @@ def __init__(self, newargs, initargs):
928928
with self.assertWarns(FutureWarning):
929929
with self.assertRaises(struct.error):
930930
MyStruct(('>h',), ('$',))
931-
with self.assertRaises(UnicodeEncodeError):
931+
with self.assertRaises(ValueError):
932932
MyStruct(('\udc00',), ('>h',))
933+
with self.assertRaises(ValueError):
934+
MyStruct((b'\xa4',), ('>h',))
933935
with self.assertWarns(FutureWarning):
934-
with self.assertRaises(UnicodeEncodeError):
936+
with self.assertRaises(ValueError):
935937
MyStruct(('>h',), ('\udc00',))
938+
with self.assertWarns(FutureWarning):
939+
with self.assertRaises(ValueError):
940+
MyStruct(('>h',), (b'\xa4',))
936941
with self.assertWarns(FutureWarning):
937942
my_struct = MyStruct(('>h',), ('<h',))
938943
self.assertEqual(my_struct.format, '<h')
@@ -954,8 +959,10 @@ class MyStruct(struct.Struct):
954959
MyStruct(42)
955960
with self.assertRaises(struct.error):
956961
MyStruct('$')
957-
with self.assertRaises(UnicodeEncodeError):
962+
with self.assertRaises(ValueError):
958963
MyStruct('\udc00')
964+
with self.assertRaises(ValueError):
965+
MyStruct(b'\xa4')
959966
with self.assertRaises(TypeError):
960967
MyStruct('>h', 42)
961968
with self.assertRaises(TypeError):
@@ -1004,7 +1011,7 @@ def test_operations_on_half_initialized_Struct(self):
10041011
self.assertRaises(RuntimeError, S.pack_into, spam, 1)
10051012
self.assertRaises(RuntimeError, S.unpack, spam)
10061013
self.assertRaises(RuntimeError, S.unpack_from, spam)
1007-
self.assertRaises(RuntimeError, getattr, S, 'format')
1014+
self.assertRaises(AttributeError, getattr, S, 'format')
10081015
self.assertRaises(RuntimeError, S.__sizeof__)
10091016
self.assertRaises(RuntimeError, repr, S)
10101017
self.assertEqual(S.size, -1)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Changed some implementation details in :class:`struct.Struct`: calling it
2+
with non-ASCII string format will now raise a :exc:`ValueError` instead of
3+
:exc:`UnicodeEncodeError`, calling it with non-ASCII bytes format will now
4+
raise a :exc:`ValueError` instead of :exc:`struct.error`, getting
5+
the :attr:`!format` attribute of uninitialized object will now raise an
6+
:exc:`AttributeError` instead of :exc:`RuntimeError`.

Modules/_struct.c

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,8 +1635,12 @@ prepare_s(PyStructObject *self, PyObject *format)
16351635

16361636
_structmodulestate *state = get_struct_state_structinst(self);
16371637

1638-
fmt = PyBytes_AS_STRING(format);
1639-
if (strlen(fmt) != (size_t)PyBytes_GET_SIZE(format)) {
1638+
if (!PyUnicode_IS_ASCII(format)) {
1639+
PyErr_SetString(PyExc_ValueError, "non-ASCII character in struct format");
1640+
return -1;
1641+
}
1642+
fmt = (const char *)PyUnicode_1BYTE_DATA(format);
1643+
if (strlen(fmt) != (size_t)PyUnicode_GET_LENGTH(format)) {
16401644
PyErr_SetString(state->StructError,
16411645
"embedded null character");
16421646
return -1;
@@ -1780,19 +1784,21 @@ static int
17801784
set_format(PyStructObject *self, PyObject *format)
17811785
{
17821786
if (PyUnicode_Check(format)) {
1783-
format = PyUnicode_AsASCIIString(format);
1784-
if (format == NULL)
1785-
return -1;
1787+
format = PyUnicode_FromObject(format);
17861788
}
17871789
else if (PyBytes_Check(format)) {
1788-
Py_INCREF(format);
1790+
format = PyUnicode_DecodeASCII(PyBytes_AS_STRING(format),
1791+
PyBytes_GET_SIZE(format), "surrogateescape");
17891792
}
17901793
else {
17911794
PyErr_Format(PyExc_TypeError,
17921795
"Struct() argument 1 must be a str or bytes object, "
17931796
"not %T", format);
17941797
return -1;
17951798
}
1799+
if (format == NULL) {
1800+
return -1;
1801+
}
17961802
if (prepare_s(self, format)) {
17971803
Py_DECREF(format);
17981804
return -1;
@@ -1821,7 +1827,7 @@ Struct_impl(PyTypeObject *type, PyObject *format)
18211827
if (self == NULL) {
18221828
return NULL;
18231829
}
1824-
self->s_format = Py_NewRef(Py_None);
1830+
self->s_format = NULL;
18251831
self->s_codes = NULL;
18261832
self->s_size = -1;
18271833
self->s_len = -1;
@@ -1878,7 +1884,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
18781884
if (self == NULL) {
18791885
return NULL;
18801886
}
1881-
self->s_format = Py_NewRef(Py_None);
1887+
self->s_format = NULL;
18821888
self->s_codes = NULL;
18831889
self->s_size = -1;
18841890
self->s_len = -1;
@@ -1892,7 +1898,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
18921898
return NULL;
18931899
}
18941900
PyObject *exc = PyErr_GetRaisedException();
1895-
Py_SETREF(self->s_format, Py_NewRef(Py_None));
1901+
Py_CLEAR(self->s_format);
18961902
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
18971903
"Invalid 'format' argument for Struct.__new__(): %S", exc))
18981904
{
@@ -1910,8 +1916,8 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
19101916
static bool
19111917
same_format(PyStructObject *s, PyObject *format)
19121918
{
1913-
Py_ssize_t size = PyBytes_GET_SIZE(s->s_format);
1914-
const void *data = PyBytes_AS_STRING(s->s_format);
1919+
Py_ssize_t size = PyUnicode_GET_LENGTH(s->s_format);
1920+
const void *data = PyUnicode_1BYTE_DATA(s->s_format);
19151921
if (PyUnicode_Check(format) && PyUnicode_IS_ASCII(format)) {
19161922
return PyUnicode_GET_LENGTH(format) == size
19171923
&& memcmp(PyUnicode_1BYTE_DATA(format), data, size) == 0;
@@ -1938,7 +1944,7 @@ static int
19381944
Struct___init___impl(PyStructObject *self, PyObject *format)
19391945
/*[clinic end generated code: output=b8e80862444e92d0 input=1af78a5f57d82cec]*/
19401946
{
1941-
if (self->s_format == Py_None) {
1947+
if (self->s_format == NULL) {
19421948
if (set_format(self, format) < 0) {
19431949
return -1;
19441950
}
@@ -1965,7 +1971,7 @@ s_init(PyObject *self, PyObject *args, PyObject *kwargs)
19651971
{
19661972
if (!((PyStructObject *)self)->init_called
19671973
&& Py_TYPE(self)->tp_init == s_init
1968-
&& ((PyStructObject *)self)->s_format != Py_None)
1974+
&& ((PyStructObject *)self)->s_format != NULL)
19691975
{
19701976
/* Struct.__init__() was called implicitly.
19711977
* __new__() already did all the work. */
@@ -2508,22 +2514,6 @@ Struct_pack_into_impl(PyStructObject *self, Py_buffer *buffer,
25082514
Py_RETURN_NONE;
25092515
}
25102516

2511-
static PyObject *
2512-
s_get_format(PyObject *op, void *Py_UNUSED(closure))
2513-
{
2514-
PyStructObject *self = PyStructObject_CAST(op);
2515-
ENSURE_STRUCT_IS_READY(self);
2516-
return PyUnicode_FromStringAndSize(PyBytes_AS_STRING(self->s_format),
2517-
PyBytes_GET_SIZE(self->s_format));
2518-
}
2519-
2520-
static PyObject *
2521-
s_get_size(PyObject *op, void *Py_UNUSED(closure))
2522-
{
2523-
PyStructObject *self = PyStructObject_CAST(op);
2524-
return PyLong_FromSsize_t(self->s_size);
2525-
}
2526-
25272517
/*[clinic input]
25282518
Struct.__sizeof__
25292519
[clinic start generated code]*/
@@ -2545,14 +2535,7 @@ s_repr(PyObject *op)
25452535
{
25462536
PyStructObject *self = PyStructObject_CAST(op);
25472537
ENSURE_STRUCT_IS_READY(self);
2548-
PyObject* fmt = PyUnicode_FromStringAndSize(
2549-
PyBytes_AS_STRING(self->s_format), PyBytes_GET_SIZE(self->s_format));
2550-
if (fmt == NULL) {
2551-
return NULL;
2552-
}
2553-
PyObject* s = PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), fmt);
2554-
Py_DECREF(fmt);
2555-
return s;
2538+
return PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), self->s_format);
25562539
}
25572540

25582541
/* List of functions */
@@ -2569,15 +2552,13 @@ static struct PyMethodDef s_methods[] = {
25692552

25702553
static PyMemberDef s_members[] = {
25712554
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PyStructObject, weakreflist), Py_READONLY},
2555+
{"format", Py_T_OBJECT_EX, offsetof(PyStructObject, s_format),
2556+
Py_READONLY, PyDoc_STR("struct format string")},
2557+
{"size", Py_T_PYSSIZET, offsetof(PyStructObject, s_size), Py_READONLY,
2558+
PyDoc_STR("struct size in bytes")},
25722559
{NULL} /* sentinel */
25732560
};
25742561

2575-
static PyGetSetDef s_getsetlist[] = {
2576-
{"format", s_get_format, NULL, PyDoc_STR("struct format string"), NULL},
2577-
{"size", s_get_size, NULL, PyDoc_STR("struct size in bytes"), NULL},
2578-
{NULL} /* sentinel */
2579-
};
2580-
25812562
static PyType_Slot PyStructType_slots[] = {
25822563
{Py_tp_dealloc, s_dealloc},
25832564
{Py_tp_getattro, PyObject_GenericGetAttr},
@@ -2588,7 +2569,6 @@ static PyType_Slot PyStructType_slots[] = {
25882569
{Py_tp_clear, s_clear},
25892570
{Py_tp_methods, s_methods},
25902571
{Py_tp_members, s_members},
2591-
{Py_tp_getset, s_getsetlist},
25922572
{Py_tp_init, s_init},
25932573
{Py_tp_new, s_new},
25942574
{0, 0},

Modules/_xxtestfuzz/fuzzer.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ static int fuzz_struct_unpack(const char* data, size_t size) {
133133
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
134134
PyErr_Clear();
135135
}
136+
/* Ignore any ValueError, these are triggered by non-ASCII format. */
137+
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
138+
PyErr_Clear();
139+
}
136140
/* Ignore any struct.error exceptions, these can be caused by invalid
137141
formats or incomplete buffers both of which are common. */
138142
if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {

0 commit comments

Comments
 (0)