@@ -31,17 +31,22 @@ cimport cpython
31
31
32
32
from libc.stdint cimport uintptr_t
33
33
from libc.string cimport memcpy, strlen, strchr
34
- from cpython.pycapsule cimport PyCapsule_New
35
34
36
35
from .. import errors
37
36
38
37
cdef extern from " nanoarrow/nanoarrow.c" :
39
38
40
39
ctypedef int ArrowErrorCode
41
40
41
+ ctypedef void (* ArrowBufferDeallocatorCallback)
42
+
43
+ cdef struct ArrowBufferAllocator:
44
+ void * private_data
45
+
42
46
cdef struct ArrowBuffer:
43
47
uint8_t * data
44
48
int64_t size_bytes
49
+ ArrowBufferAllocator allocator
45
50
46
51
cdef union ArrowBufferViewData:
47
52
const void * data
@@ -65,6 +70,8 @@ cdef extern from "nanoarrow/nanoarrow.c":
65
70
66
71
cdef ArrowErrorCode NANOARROW_OK
67
72
73
+ ArrowErrorCode ArrowArrayAllocateChildren(ArrowArray * array,
74
+ int64_t n_children)
68
75
ArrowErrorCode ArrowArrayAppendBytes(ArrowArray* array,
69
76
ArrowBufferView value)
70
77
ArrowErrorCode ArrowArrayAppendDecimal(ArrowArray* array,
@@ -88,11 +95,15 @@ cdef extern from "nanoarrow/nanoarrow.c":
88
95
const ArrowArray* array,
89
96
ArrowError* error)
90
97
int8_t ArrowBitGet(const uint8_t* bits, int64_t i)
98
+ ArrowBufferAllocator ArrowBufferDeallocator(ArrowBufferDeallocatorCallback,
99
+ void * private_data)
91
100
void ArrowDecimalInit(ArrowDecimal* decimal, int32_t bitwidth,
92
101
int32_t precision, int32_t scale)
93
102
void ArrowDecimalSetBytes(ArrowDecimal * decimal, const uint8_t* value)
94
103
ArrowErrorCode ArrowDecimalSetDigits(ArrowDecimal* decimal,
95
104
ArrowStringView value)
105
+ ArrowErrorCode ArrowSchemaDeepCopy(const ArrowSchema * schema,
106
+ ArrowSchema * schema_out)
96
107
void ArrowSchemaInit(ArrowSchema* schema)
97
108
ArrowErrorCode ArrowSchemaInitFromType(ArrowSchema* schema, ArrowType type )
98
109
void ArrowSchemaRelease(ArrowSchema * schema)
@@ -117,22 +128,13 @@ cdef int _check_nanoarrow(int code) except -1:
117
128
errors._raise_err(errors.ERR_ARROW_C_API_ERROR, code = code)
118
129
119
130
120
- cdef void array_deleter(ArrowArray * array) noexcept:
121
- """
122
- Called when an external library calls the release for an Arrow array. This
123
- method simply marks the release as completed but doesn't actually do it, so
124
- that the handling of duplicate rows can still make use of the array, even
125
- if the external library no longer requires it!
126
- """
127
- array.release = NULL
128
-
129
-
130
131
cdef void pycapsule_array_deleter(object array_capsule) noexcept:
131
132
cdef ArrowArray* array = < ArrowArray* > cpython.PyCapsule_GetPointer(
132
133
array_capsule, " arrow_array"
133
134
)
134
135
if array.release != NULL :
135
136
ArrowArrayRelease(array)
137
+ cpython.PyMem_Free(array)
136
138
137
139
138
140
cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
@@ -141,6 +143,65 @@ cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
141
143
)
142
144
if schema.release != NULL :
143
145
ArrowSchemaRelease(schema)
146
+ cpython.PyMem_Free(schema)
147
+
148
+
149
+ cdef void arrow_buffer_dealloc_callback(ArrowBufferAllocator * allocator,
150
+ uint8_t * ptr, int64_t size):
151
+ """
152
+ ArrowBufferDeallocatorCallback for an ArrowBuffer borrowed from
153
+ OracleArrowArray
154
+ """
155
+ cpython.Py_DECREF(< OracleArrowArray> allocator.private_data)
156
+
157
+
158
+ cdef int copy_arrow_array(OracleArrowArray oracle_arrow_array,
159
+ ArrowArray * src, ArrowArray * dest) except - 1 :
160
+ """
161
+ Shallow copy source ArrowArray to destination ArrowArray. The source
162
+ ArrowArray belongs to the wrapper OracleArrowArray. The shallow copy idea
163
+ is borrowed from nanoarrow:
164
+ https://github.com/apache/arrow-nanoarrow/main/blob/python
165
+ """
166
+ cdef:
167
+ ArrowBuffer * dest_buffer
168
+ ssize_t i
169
+ _check_nanoarrow(
170
+ ArrowArrayInitFromType(
171
+ dest, NANOARROW_TYPE_UNINITIALIZED
172
+ )
173
+ )
174
+
175
+ # Copy metadata
176
+ dest.length = src.length
177
+ dest.offset = src.offset
178
+ dest.null_count = src.null_count
179
+
180
+ # Borrow an ArrowBuffer belonging to OracleArrowArray. The ArrowBuffer can
181
+ # belong to an immediate ArrowArray or a child (in case of nested types).
182
+ # Either way, we PY_INCREF(oracle_arrow_array), so that it is not
183
+ # prematurely garbage collected. The corresponding PY_DECREF happens in the
184
+ # ArrowBufferDeAllocator callback.
185
+ for i in range (src.n_buffers):
186
+ if src.buffers[i] != NULL :
187
+ dest_buffer = ArrowArrayBuffer(dest, i)
188
+ dest_buffer.data = < uint8_t * > src.buffers[i]
189
+ dest_buffer.size_bytes = 0
190
+ dest_buffer.allocator = ArrowBufferDeallocator(
191
+ < ArrowBufferDeallocatorCallback> arrow_buffer_dealloc_callback,
192
+ < void * > oracle_arrow_array
193
+ )
194
+ cpython.Py_INCREF(oracle_arrow_array)
195
+ dest.buffers[i] = src.buffers[i]
196
+ dest.n_buffers = src.n_buffers
197
+
198
+ # shallow copy of children (recursive call)
199
+ if src.n_children > 0 :
200
+ _check_nanoarrow(ArrowArrayAllocateChildren(dest, src.n_children))
201
+ for i in range (src.n_children):
202
+ copy_arrow_array(
203
+ oracle_arrow_array, src.children[i], dest.children[i]
204
+ )
144
205
145
206
146
207
cdef class OracleArrowArray:
@@ -187,8 +248,6 @@ cdef class OracleArrowArray:
187
248
188
249
def __dealloc__ (self ):
189
250
if self .arrow_array != NULL :
190
- if self .arrow_array.release == NULL :
191
- self .arrow_array.release = self .actual_array_release
192
251
if self .arrow_array.release != NULL :
193
252
ArrowArrayRelease(self .arrow_array)
194
253
cpython.PyMem_Free(self .arrow_array)
@@ -409,6 +468,26 @@ cdef class OracleArrowArray:
409
468
def offset(self ) -> int:
410
469
return self.arrow_array.offset
411
470
471
+ def __arrow_c_schema__(self ):
472
+ """
473
+ Export an ArrowSchema PyCapsule
474
+ """
475
+ cdef ArrowSchema * exported_schema = \
476
+ < ArrowSchema* > cpython.PyMem_Malloc(sizeof(ArrowSchema))
477
+ try :
478
+ _check_nanoarrow(
479
+ ArrowSchemaDeepCopy(
480
+ self .arrow_schema,
481
+ exported_schema
482
+ )
483
+ )
484
+ except :
485
+ cpython.PyMem_Free(exported_schema)
486
+ raise
487
+ return cpython.PyCapsule_New(
488
+ exported_schema, ' arrow_schema' , & pycapsule_schema_deleter
489
+ )
490
+
412
491
def __arrow_c_array__ (self , requested_schema = None ):
413
492
"""
414
493
Returns
@@ -419,13 +498,14 @@ cdef class OracleArrowArray:
419
498
"""
420
499
if requested_schema is not None :
421
500
raise NotImplementedError (" requested_schema" )
422
-
423
- array_capsule = PyCapsule_New(
424
- self .arrow_array, ' arrow_array' , & pycapsule_array_deleter
425
- )
426
- self .actual_array_release = self .arrow_array.release
427
- self .arrow_array.release = array_deleter
428
- schema_capsule = PyCapsule_New(
429
- self .arrow_schema, " arrow_schema" , & pycapsule_schema_deleter
430
- )
431
- return schema_capsule, array_capsule
501
+ cdef ArrowArray * exported_array = \
502
+ < ArrowArray * > cpython.PyMem_Malloc(sizeof(ArrowArray))
503
+ try :
504
+ copy_arrow_array(self , self .arrow_array, exported_array)
505
+ array_capsule = cpython.PyCapsule_New(
506
+ exported_array, ' arrow_array' , & pycapsule_array_deleter
507
+ )
508
+ except :
509
+ cpython.PyMem_Free(exported_array)
510
+ raise
511
+ return self .__arrow_c_schema__(), array_capsule
0 commit comments