Skip to content

Commit 03182f3

Browse files
Fixed memory corruption in data frame queries (#489) and added support
for converting an OracleDataFrame object to a foreign data frame object multiple times (#470).
1 parent d210dff commit 03182f3

File tree

5 files changed

+118
-40
lines changed

5 files changed

+118
-40
lines changed

doc/src/release_notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ Common Changes
3636
(`issue 493 <https://github.com/oracle/python-oracledb/issues/493>`__).
3737
#) Miscellaneous grammar and spelling fixes by John Bampton
3838
(`PR 479 <https://github.com/oracle/python-oracledb/pull/479>`__).
39+
#) Fixed memory corruption in DataFrame queries
40+
(`issue 489 <https://github.com/oracle/python-oracledb/issues/489>`__).
41+
#) Added support for converting an OracleDataFrame object to a foreign data
42+
frame object more than once
43+
(`issue 470 <https://github.com/oracle/python-oracledb/issues/470>`__)
3944

4045

4146
oracledb 3.1.0 (April 2025)

src/oracledb/interchange/nanoarrow_bridge.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ cdef extern from "nanoarrow.h":
4141
int64_t null_count
4242
int64_t offset
4343
int64_t n_buffers
44+
int64_t n_children
45+
ArrowArray** children
46+
const void** buffers
4447
void (*release)(ArrowArray*)
4548

4649
cdef struct ArrowSchema:
@@ -57,6 +60,7 @@ cdef extern from "nanoarrow.h":
5760
NANOARROW_TYPE_LARGE_STRING
5861
NANOARROW_TYPE_STRING
5962
NANOARROW_TYPE_TIMESTAMP
63+
NANOARROW_TYPE_UNINITIALIZED
6064

6165
cpdef enum ArrowTimeUnit:
6266
NANOARROW_TIME_UNIT_SECOND
@@ -87,7 +91,6 @@ cdef class OracleArrowArray:
8791
double factor
8892
ArrowArray *arrow_array
8993
ArrowSchema *arrow_schema
90-
void (*actual_array_release)(ArrowArray*) noexcept
9194

9295
cdef str _schema_to_string(self)
9396
cdef int append_bytes(self, void* ptr, int64_t num_bytes) except -1

src/oracledb/interchange/nanoarrow_bridge.pyx

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,22 @@ cimport cpython
3131

3232
from libc.stdint cimport uintptr_t
3333
from libc.string cimport memcpy, strlen, strchr
34-
from cpython.pycapsule cimport PyCapsule_New
3534

3635
from .. import errors
3736

3837
cdef extern from "nanoarrow/nanoarrow.c":
3938

4039
ctypedef int ArrowErrorCode
4140

41+
ctypedef void (*ArrowBufferDeallocatorCallback)
42+
43+
cdef struct ArrowBufferAllocator:
44+
void *private_data
45+
4246
cdef struct ArrowBuffer:
4347
uint8_t *data
4448
int64_t size_bytes
49+
ArrowBufferAllocator allocator
4550

4651
cdef union ArrowBufferViewData:
4752
const void* data
@@ -65,6 +70,8 @@ cdef extern from "nanoarrow/nanoarrow.c":
6570

6671
cdef ArrowErrorCode NANOARROW_OK
6772

73+
ArrowErrorCode ArrowArrayAllocateChildren(ArrowArray *array,
74+
int64_t n_children)
6875
ArrowErrorCode ArrowArrayAppendBytes(ArrowArray* array,
6976
ArrowBufferView value)
7077
ArrowErrorCode ArrowArrayAppendDecimal(ArrowArray* array,
@@ -88,11 +95,15 @@ cdef extern from "nanoarrow/nanoarrow.c":
8895
const ArrowArray* array,
8996
ArrowError* error)
9097
int8_t ArrowBitGet(const uint8_t* bits, int64_t i)
98+
ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback,
99+
void *private_data)
91100
void ArrowDecimalInit(ArrowDecimal* decimal, int32_t bitwidth,
92101
int32_t precision, int32_t scale)
93102
void ArrowDecimalSetBytes(ArrowDecimal *decimal, const uint8_t* value)
94103
ArrowErrorCode ArrowDecimalSetDigits(ArrowDecimal* decimal,
95104
ArrowStringView value)
105+
ArrowErrorCode ArrowSchemaDeepCopy(const ArrowSchema *schema,
106+
ArrowSchema *schema_out)
96107
void ArrowSchemaInit(ArrowSchema* schema)
97108
ArrowErrorCode ArrowSchemaInitFromType(ArrowSchema* schema, ArrowType type)
98109
void ArrowSchemaRelease(ArrowSchema *schema)
@@ -117,22 +128,13 @@ cdef int _check_nanoarrow(int code) except -1:
117128
errors._raise_err(errors.ERR_ARROW_C_API_ERROR, code=code)
118129

119130

120-
cdef void array_deleter(ArrowArray *array) noexcept:
121-
"""
122-
Called when an external library calls the release for an Arrow array. This
123-
method simply marks the release as completed but doesn't actually do it, so
124-
that the handling of duplicate rows can still make use of the array, even
125-
if the external library no longer requires it!
126-
"""
127-
array.release = NULL
128-
129-
130131
cdef void pycapsule_array_deleter(object array_capsule) noexcept:
131132
cdef ArrowArray* array = <ArrowArray*> cpython.PyCapsule_GetPointer(
132133
array_capsule, "arrow_array"
133134
)
134135
if array.release != NULL:
135136
ArrowArrayRelease(array)
137+
cpython.PyMem_Free(array)
136138

137139

138140
cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
@@ -141,6 +143,65 @@ cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
141143
)
142144
if schema.release != NULL:
143145
ArrowSchemaRelease(schema)
146+
cpython.PyMem_Free(schema)
147+
148+
149+
cdef void arrow_buffer_dealloc_callback(ArrowBufferAllocator *allocator,
150+
uint8_t *ptr, int64_t size):
151+
"""
152+
ArrowBufferDeallocatorCallback for an ArrowBuffer borrowed from
153+
OracleArrowArray
154+
"""
155+
cpython.Py_DECREF(<OracleArrowArray> allocator.private_data)
156+
157+
158+
cdef int copy_arrow_array(OracleArrowArray oracle_arrow_array,
159+
ArrowArray *src, ArrowArray *dest) except -1:
160+
"""
161+
Shallow copy source ArrowArray to destination ArrowArray. The source
162+
ArrowArray belongs to the wrapper OracleArrowArray. The shallow copy idea
163+
is borrowed from nanoarrow:
164+
https://github.com/apache/arrow-nanoarrow/main/blob/python
165+
"""
166+
cdef:
167+
ArrowBuffer *dest_buffer
168+
ssize_t i
169+
_check_nanoarrow(
170+
ArrowArrayInitFromType(
171+
dest, NANOARROW_TYPE_UNINITIALIZED
172+
)
173+
)
174+
175+
# Copy metadata
176+
dest.length = src.length
177+
dest.offset = src.offset
178+
dest.null_count = src.null_count
179+
180+
# Borrow an ArrowBuffer belonging to OracleArrowArray. The ArrowBuffer can
181+
# belong to an immediate ArrowArray or a child (in case of nested types).
182+
# Either way, we PY_INCREF(oracle_arrow_array), so that it is not
183+
# prematurely garbage collected. The corresponding PY_DECREF happens in the
184+
# ArrowBufferDeAllocator callback.
185+
for i in range(src.n_buffers):
186+
if src.buffers[i] != NULL:
187+
dest_buffer = ArrowArrayBuffer(dest, i)
188+
dest_buffer.data = <uint8_t *> src.buffers[i]
189+
dest_buffer.size_bytes = 0
190+
dest_buffer.allocator = ArrowBufferDeallocator(
191+
<ArrowBufferDeallocatorCallback> arrow_buffer_dealloc_callback,
192+
<void *> oracle_arrow_array
193+
)
194+
cpython.Py_INCREF(oracle_arrow_array)
195+
dest.buffers[i] = src.buffers[i]
196+
dest.n_buffers = src.n_buffers
197+
198+
# shallow copy of children (recursive call)
199+
if src.n_children > 0:
200+
_check_nanoarrow(ArrowArrayAllocateChildren(dest, src.n_children))
201+
for i in range(src.n_children):
202+
copy_arrow_array(
203+
oracle_arrow_array, src.children[i], dest.children[i]
204+
)
144205

145206

146207
cdef class OracleArrowArray:
@@ -187,8 +248,6 @@ cdef class OracleArrowArray:
187248

188249
def __dealloc__(self):
189250
if self.arrow_array != NULL:
190-
if self.arrow_array.release == NULL:
191-
self.arrow_array.release = self.actual_array_release
192251
if self.arrow_array.release != NULL:
193252
ArrowArrayRelease(self.arrow_array)
194253
cpython.PyMem_Free(self.arrow_array)
@@ -409,6 +468,26 @@ cdef class OracleArrowArray:
409468
def offset(self) -> int:
410469
return self.arrow_array.offset
411470

471+
def __arrow_c_schema__(self):
472+
"""
473+
Export an ArrowSchema PyCapsule
474+
"""
475+
cdef ArrowSchema *exported_schema = \
476+
<ArrowSchema*> cpython.PyMem_Malloc(sizeof(ArrowSchema))
477+
try:
478+
_check_nanoarrow(
479+
ArrowSchemaDeepCopy(
480+
self.arrow_schema,
481+
exported_schema
482+
)
483+
)
484+
except:
485+
cpython.PyMem_Free(exported_schema)
486+
raise
487+
return cpython.PyCapsule_New(
488+
exported_schema, 'arrow_schema', &pycapsule_schema_deleter
489+
)
490+
412491
def __arrow_c_array__(self, requested_schema=None):
413492
"""
414493
Returns
@@ -419,13 +498,14 @@ cdef class OracleArrowArray:
419498
"""
420499
if requested_schema is not None:
421500
raise NotImplementedError("requested_schema")
422-
423-
array_capsule = PyCapsule_New(
424-
self.arrow_array, 'arrow_array', &pycapsule_array_deleter
425-
)
426-
self.actual_array_release = self.arrow_array.release
427-
self.arrow_array.release = array_deleter
428-
schema_capsule = PyCapsule_New(
429-
self.arrow_schema, "arrow_schema", &pycapsule_schema_deleter
430-
)
431-
return schema_capsule, array_capsule
501+
cdef ArrowArray *exported_array = \
502+
<ArrowArray *> cpython.PyMem_Malloc(sizeof(ArrowArray))
503+
try:
504+
copy_arrow_array(self, self.arrow_array, exported_array)
505+
array_capsule = cpython.PyCapsule_New(
506+
exported_array, 'arrow_array', &pycapsule_array_deleter
507+
)
508+
except:
509+
cpython.PyMem_Free(exported_array)
510+
raise
511+
return self.__arrow_c_schema__(), array_capsule

tests/test_8000_dataframe.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -409,18 +409,13 @@ def test_8009(self):
409409
self.__test_df_batches_interop(DATASET_4, batch_size=5, num_batches=2)
410410

411411
def test_8010(self):
412-
"8010 - verify passing Arrow arrays twice fails"
412+
"8010 - verify passing Arrow arrays twice works"
413413
self.__check_interop()
414414
self.__populate_table(DATASET_1)
415415
statement = "select * from TestDataFrame order by Id"
416416
ora_df = self.conn.fetch_df_all(statement)
417-
pyarrow.Table.from_arrays(
418-
ora_df.column_arrays(), names=ora_df.column_names()
419-
)
420-
with self.assertRaises(pyarrow.lib.ArrowInvalid):
421-
pyarrow.Table.from_arrays(
422-
ora_df.column_arrays(), names=ora_df.column_names()
423-
)
417+
self.__validate_df(ora_df, DATASET_1)
418+
self.__validate_df(ora_df, DATASET_1)
424419

425420
def test_8011(self):
426421
"8011 - verify empty data set"

tests/test_8100_dataframe_async.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -420,18 +420,13 @@ async def test_8109(self):
420420
)
421421

422422
async def test_8110(self):
423-
"8110 - verify passing Arrow arrays twice fails"
423+
"8110 - verify passing Arrow arrays twice works"
424424
self.__check_interop()
425425
await self.__populate_table(DATASET_1)
426426
statement = "select * from TestDataFrame order by Id"
427427
ora_df = await self.conn.fetch_df_all(statement)
428-
pyarrow.Table.from_arrays(
429-
ora_df.column_arrays(), names=ora_df.column_names()
430-
)
431-
with self.assertRaises(pyarrow.lib.ArrowInvalid):
432-
pyarrow.Table.from_arrays(
433-
ora_df.column_arrays(), names=ora_df.column_names()
434-
)
428+
self.__validate_df(ora_df, DATASET_1)
429+
self.__validate_df(ora_df, DATASET_1)
435430

436431
async def test_8111(self):
437432
"8111 - verify empty data set"

0 commit comments

Comments
 (0)