Skip to content

Commit 4e1896c

Browse files
committed
Merge remote-tracking branch 'upstream/master' into split-out-arrow-schema
2 parents 02d0e38 + 5146663 commit 4e1896c

File tree

15 files changed

+1991
-1701
lines changed

15 files changed

+1991
-1701
lines changed

arrow-schema/Cargo.toml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,10 @@ path = "src/lib.rs"
3838
bench = false
3939

4040
[dependencies]
41-
serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
42-
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
43-
44-
[package.metadata.docs.rs]
45-
features = ["json"]
41+
serde = { version = "1.0", default-features = false, features = ["derive", "std"], optional = true }
4642

4743
[features]
4844
default = []
49-
json = ["serde", "serde_json"]
5045

5146
[dev-dependencies]
5247

arrow-schema/src/datatype.rs

Lines changed: 0 additions & 347 deletions
Large diffs are not rendered by default.

arrow-schema/src/field.rs

Lines changed: 0 additions & 279 deletions
Original file line numberDiff line numberDiff line change
@@ -250,285 +250,6 @@ impl Field {
250250
}
251251
}
252252

253-
/// Parse a `Field` definition from a JSON representation.
254-
#[cfg(feature = "json")]
255-
pub fn from(json: &serde_json::Value) -> Result<Self, ArrowError> {
256-
use serde_json::Value;
257-
match *json {
258-
Value::Object(ref map) => {
259-
let name = match map.get("name") {
260-
Some(&Value::String(ref name)) => name.to_string(),
261-
_ => {
262-
return Err(ArrowError::ParseError(
263-
"Field missing 'name' attribute".to_string(),
264-
));
265-
}
266-
};
267-
let nullable = match map.get("nullable") {
268-
Some(&Value::Bool(b)) => b,
269-
_ => {
270-
return Err(ArrowError::ParseError(
271-
"Field missing 'nullable' attribute".to_string(),
272-
));
273-
}
274-
};
275-
let data_type = match map.get("type") {
276-
Some(t) => DataType::from(t)?,
277-
_ => {
278-
return Err(ArrowError::ParseError(
279-
"Field missing 'type' attribute".to_string(),
280-
));
281-
}
282-
};
283-
284-
// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
285-
let metadata = match map.get("metadata") {
286-
Some(&Value::Array(ref values)) => {
287-
let mut res: BTreeMap<String, String> = BTreeMap::new();
288-
for value in values {
289-
match value.as_object() {
290-
Some(map) => {
291-
if map.len() != 2 {
292-
return Err(ArrowError::ParseError(
293-
"Field 'metadata' must have exact two entries for each key-value map".to_string(),
294-
));
295-
}
296-
if let (Some(k), Some(v)) =
297-
(map.get("key"), map.get("value"))
298-
{
299-
if let (Some(k_str), Some(v_str)) =
300-
(k.as_str(), v.as_str())
301-
{
302-
res.insert(
303-
k_str.to_string().clone(),
304-
v_str.to_string().clone(),
305-
);
306-
} else {
307-
return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
308-
}
309-
} else {
310-
return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
311-
}
312-
}
313-
_ => {
314-
return Err(ArrowError::ParseError(
315-
"Field 'metadata' contains non-object key-value pair".to_string(),
316-
));
317-
}
318-
}
319-
}
320-
Some(res)
321-
}
322-
// We also support map format, because Schema's metadata supports this.
323-
// See https://github.com/apache/arrow/pull/5907
324-
Some(&Value::Object(ref values)) => {
325-
let mut res: BTreeMap<String, String> = BTreeMap::new();
326-
for (k, v) in values {
327-
if let Some(str_value) = v.as_str() {
328-
res.insert(k.clone(), str_value.to_string().clone());
329-
} else {
330-
return Err(ArrowError::ParseError(
331-
format!("Field 'metadata' contains non-string value for key {}", k),
332-
));
333-
}
334-
}
335-
Some(res)
336-
}
337-
Some(_) => {
338-
return Err(ArrowError::ParseError(
339-
"Field `metadata` is not json array".to_string(),
340-
));
341-
}
342-
_ => None,
343-
};
344-
345-
// if data_type is a struct or list, get its children
346-
let data_type = match data_type {
347-
DataType::List(_)
348-
| DataType::LargeList(_)
349-
| DataType::FixedSizeList(_, _) => match map.get("children") {
350-
Some(Value::Array(values)) => {
351-
if values.len() != 1 {
352-
return Err(ArrowError::ParseError(
353-
"Field 'children' must have one element for a list data type".to_string(),
354-
));
355-
}
356-
match data_type {
357-
DataType::List(_) => {
358-
DataType::List(Box::new(Self::from(&values[0])?))
359-
}
360-
DataType::LargeList(_) => {
361-
DataType::LargeList(Box::new(Self::from(&values[0])?))
362-
}
363-
DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
364-
Box::new(Self::from(&values[0])?),
365-
int,
366-
),
367-
_ => unreachable!(
368-
"Data type should be a list, largelist or fixedsizelist"
369-
),
370-
}
371-
}
372-
Some(_) => {
373-
return Err(ArrowError::ParseError(
374-
"Field 'children' must be an array".to_string(),
375-
))
376-
}
377-
None => {
378-
return Err(ArrowError::ParseError(
379-
"Field missing 'children' attribute".to_string(),
380-
));
381-
}
382-
},
383-
DataType::Struct(mut fields) => match map.get("children") {
384-
Some(Value::Array(values)) => {
385-
let struct_fields: Result<Vec<Field>, _> =
386-
values.iter().map(Field::from).collect();
387-
fields.append(&mut struct_fields?);
388-
DataType::Struct(fields)
389-
}
390-
Some(_) => {
391-
return Err(ArrowError::ParseError(
392-
"Field 'children' must be an array".to_string(),
393-
))
394-
}
395-
None => {
396-
return Err(ArrowError::ParseError(
397-
"Field missing 'children' attribute".to_string(),
398-
));
399-
}
400-
},
401-
DataType::Map(_, keys_sorted) => {
402-
match map.get("children") {
403-
Some(Value::Array(values)) if values.len() == 1 => {
404-
let child = Self::from(&values[0])?;
405-
// child must be a struct
406-
match child.data_type() {
407-
DataType::Struct(map_fields) if map_fields.len() == 2 => {
408-
DataType::Map(Box::new(child), keys_sorted)
409-
}
410-
t => {
411-
return Err(ArrowError::ParseError(
412-
format!("Map children should be a struct with 2 fields, found {:?}", t)
413-
))
414-
}
415-
}
416-
}
417-
Some(_) => {
418-
return Err(ArrowError::ParseError(
419-
"Field 'children' must be an array with 1 element"
420-
.to_string(),
421-
))
422-
}
423-
None => {
424-
return Err(ArrowError::ParseError(
425-
"Field missing 'children' attribute".to_string(),
426-
));
427-
}
428-
}
429-
}
430-
DataType::Union(_, type_ids, mode) => match map.get("children") {
431-
Some(Value::Array(values)) => {
432-
let union_fields: Vec<Field> = values
433-
.iter()
434-
.map(Field::from)
435-
.collect::<Result<_, _>>()?;
436-
DataType::Union(union_fields, type_ids, mode)
437-
}
438-
Some(_) => {
439-
return Err(ArrowError::ParseError(
440-
"Field 'children' must be an array".to_string(),
441-
))
442-
}
443-
None => {
444-
return Err(ArrowError::ParseError(
445-
"Field missing 'children' attribute".to_string(),
446-
));
447-
}
448-
},
449-
_ => data_type,
450-
};
451-
452-
let mut dict_id = 0;
453-
let mut dict_is_ordered = false;
454-
455-
let data_type = match map.get("dictionary") {
456-
Some(dictionary) => {
457-
let index_type = match dictionary.get("indexType") {
458-
Some(t) => DataType::from(t)?,
459-
_ => {
460-
return Err(ArrowError::ParseError(
461-
"Field missing 'indexType' attribute".to_string(),
462-
));
463-
}
464-
};
465-
dict_id = match dictionary.get("id") {
466-
Some(Value::Number(n)) => n.as_i64().unwrap(),
467-
_ => {
468-
return Err(ArrowError::ParseError(
469-
"Field missing 'id' attribute".to_string(),
470-
));
471-
}
472-
};
473-
dict_is_ordered = match dictionary.get("isOrdered") {
474-
Some(&Value::Bool(n)) => n,
475-
_ => {
476-
return Err(ArrowError::ParseError(
477-
"Field missing 'isOrdered' attribute".to_string(),
478-
));
479-
}
480-
};
481-
DataType::Dictionary(Box::new(index_type), Box::new(data_type))
482-
}
483-
_ => data_type,
484-
};
485-
Ok(Field {
486-
name,
487-
data_type,
488-
nullable,
489-
dict_id,
490-
dict_is_ordered,
491-
metadata,
492-
})
493-
}
494-
_ => Err(ArrowError::ParseError(
495-
"Invalid json value type for field".to_string(),
496-
)),
497-
}
498-
}
499-
500-
/// Generate a JSON representation of the `Field`.
501-
#[cfg(feature = "json")]
502-
pub fn to_json(&self) -> serde_json::Value {
503-
let children: Vec<serde_json::Value> = match self.data_type() {
504-
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
505-
DataType::List(field)
506-
| DataType::LargeList(field)
507-
| DataType::FixedSizeList(field, _)
508-
| DataType::Map(field, _) => vec![field.to_json()],
509-
_ => vec![],
510-
};
511-
match self.data_type() {
512-
DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
513-
"name": self.name,
514-
"nullable": self.nullable,
515-
"type": value_type.to_json(),
516-
"children": children,
517-
"dictionary": {
518-
"id": self.dict_id,
519-
"indexType": index_type.to_json(),
520-
"isOrdered": self.dict_is_ordered
521-
}
522-
}),
523-
_ => serde_json::json!({
524-
"name": self.name,
525-
"nullable": self.nullable,
526-
"type": self.data_type.to_json(),
527-
"children": children
528-
}),
529-
}
530-
}
531-
532253
/// Merge this field into self if it is compatible.
533254
///
534255
/// Struct fields are merged recursively.

0 commit comments

Comments
 (0)