@@ -46,8 +46,27 @@ pub struct ArrowFile {
46
46
pub schema : Schema ,
47
47
// we can evolve this into a concrete Arrow type
48
48
// this is temporarily not being read from
49
- pub _dictionaries : HashMap < i64 , ArrowJsonDictionaryBatch > ,
50
- pub batches : Vec < RecordBatch > ,
49
+ dictionaries : HashMap < i64 , ArrowJsonDictionaryBatch > ,
50
+ arrow_json : Value ,
51
+ }
52
+
53
+ impl ArrowFile {
54
+ pub fn read_batch ( & self , batch_num : usize ) -> Result < RecordBatch > {
55
+ let b = self . arrow_json [ "batches" ] . get ( batch_num) . unwrap ( ) ;
56
+ let json_batch: ArrowJsonBatch = serde_json:: from_value ( b. clone ( ) ) . unwrap ( ) ;
57
+ record_batch_from_json ( & self . schema , json_batch, Some ( & self . dictionaries ) )
58
+ }
59
+
60
+ pub fn read_batches ( & self ) -> Result < Vec < RecordBatch > > {
61
+ let mut batches = vec ! [ ] ;
62
+ // XXX collect?
63
+ for b in self . arrow_json [ "batches" ] . as_array ( ) . unwrap ( ) {
64
+ let json_batch: ArrowJsonBatch = serde_json:: from_value ( b. clone ( ) ) . unwrap ( ) ;
65
+ let batch = record_batch_from_json ( & self . schema , json_batch, Some ( & self . dictionaries ) ) ?;
66
+ batches. push ( batch) ;
67
+ }
68
+ Ok ( batches)
69
+ }
51
70
}
52
71
53
72
// Canonicalize the names of map fields in a schema
@@ -87,13 +106,7 @@ pub fn canonicalize_schema(schema: &Schema) -> Schema {
87
106
Schema :: new ( fields) . with_metadata ( schema. metadata ( ) . clone ( ) )
88
107
}
89
108
90
- struct LazyArrowFile {
91
- schema : Schema ,
92
- dictionaries : HashMap < i64 , ArrowJsonDictionaryBatch > ,
93
- arrow_json : Value ,
94
- }
95
-
96
- fn read_json_file_metadata ( json_name : & str ) -> Result < LazyArrowFile > {
109
+ pub fn open_json_file ( json_name : & str ) -> Result < ArrowFile > {
97
110
let json_file = File :: open ( json_name) ?;
98
111
let reader = BufReader :: new ( json_file) ;
99
112
let arrow_json: Value = serde_json:: from_reader ( reader) . unwrap ( ) ;
@@ -111,37 +124,13 @@ fn read_json_file_metadata(json_name: &str) -> Result<LazyArrowFile> {
111
124
dictionaries. insert ( json_dict. id , json_dict) ;
112
125
}
113
126
}
114
- Ok ( LazyArrowFile {
127
+ Ok ( ArrowFile {
115
128
schema,
116
129
dictionaries,
117
130
arrow_json,
118
131
} )
119
132
}
120
133
121
- pub fn read_json_file ( json_name : & str ) -> Result < ArrowFile > {
122
- let f = read_json_file_metadata ( json_name) ?;
123
-
124
- let mut batches = vec ! [ ] ;
125
- for b in f. arrow_json [ "batches" ] . as_array ( ) . unwrap ( ) {
126
- let json_batch: ArrowJsonBatch = serde_json:: from_value ( b. clone ( ) ) . unwrap ( ) ;
127
- let batch = record_batch_from_json ( & f. schema , json_batch, Some ( & f. dictionaries ) ) ?;
128
- batches. push ( batch) ;
129
- }
130
- Ok ( ArrowFile {
131
- schema : f. schema ,
132
- _dictionaries : f. dictionaries ,
133
- batches,
134
- } )
135
- }
136
-
137
- pub fn read_single_batch_from_json_file ( json_name : & str , batch_num : usize ) -> Result < RecordBatch > {
138
- let f = read_json_file_metadata ( json_name) ?;
139
- let b = f. arrow_json [ "batches" ] . get ( batch_num) . unwrap ( ) ;
140
- let json_batch: ArrowJsonBatch = serde_json:: from_value ( b. clone ( ) ) . unwrap ( ) ;
141
- let batch = record_batch_from_json ( & f. schema , json_batch, Some ( & f. dictionaries ) ) ?;
142
- Ok ( batch)
143
- }
144
-
145
134
/// Read gzipped JSON test file
146
135
///
147
136
/// For example given the input:
@@ -176,7 +165,7 @@ fn cdata_integration_export_schema_from_json(
176
165
out : * mut FFI_ArrowSchema ,
177
166
) -> Result < ( ) > {
178
167
let json_name = unsafe { CStr :: from_ptr ( c_json_name) } ;
179
- let f = read_json_file_metadata ( json_name. to_str ( ) ?) ?;
168
+ let f = open_json_file ( json_name. to_str ( ) ?) ?;
180
169
let c_schema = FFI_ArrowSchema :: try_from ( & f. schema ) ?;
181
170
// Move exported schema into output struct
182
171
unsafe { ptr:: write ( out, c_schema) } ;
@@ -189,7 +178,7 @@ fn cdata_integration_export_batch_from_json(
189
178
out : * mut FFI_ArrowArray ,
190
179
) -> Result < ( ) > {
191
180
let json_name = unsafe { CStr :: from_ptr ( c_json_name) } ;
192
- let b = read_single_batch_from_json_file ( json_name. to_str ( ) ?, batch_num. try_into ( ) . unwrap ( ) ) ?;
181
+ let b = open_json_file ( json_name. to_str ( ) ?) ? . read_batch ( batch_num. try_into ( ) . unwrap ( ) ) ?;
193
182
let a = StructArray :: from ( b) . into_data ( ) ;
194
183
let c_array = FFI_ArrowArray :: new ( & a) ;
195
184
// Move exported array into output struct
@@ -202,7 +191,7 @@ fn cdata_integration_import_schema_and_compare_to_json(
202
191
c_schema : * mut FFI_ArrowSchema ,
203
192
) -> Result < ( ) > {
204
193
let json_name = unsafe { CStr :: from_ptr ( c_json_name) } ;
205
- let json_schema = read_json_file_metadata ( json_name. to_str ( ) ?) ?. schema ;
194
+ let json_schema = open_json_file ( json_name. to_str ( ) ?) ?. schema ;
206
195
207
196
// The source ArrowSchema will be released when this is dropped
208
197
let imported_schema = unsafe { FFI_ArrowSchema :: from_raw ( c_schema) } ;
@@ -241,7 +230,7 @@ fn cdata_integration_import_batch_and_compare_to_json(
241
230
) -> Result < ( ) > {
242
231
let json_name = unsafe { CStr :: from_ptr ( c_json_name) } ;
243
232
let json_batch =
244
- read_single_batch_from_json_file ( json_name. to_str ( ) ?, batch_num. try_into ( ) . unwrap ( ) ) ?;
233
+ open_json_file ( json_name. to_str ( ) ?) ? . read_batch ( batch_num. try_into ( ) . unwrap ( ) ) ?;
245
234
let schema = json_batch. schema ( ) ;
246
235
247
236
let data_type_for_import = DataType :: Struct ( schema. fields . clone ( ) ) ;
0 commit comments