1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use serde_derive::{Deserialize, Serialize};
use serde_json::Value;
use crate::error::Error;
pub mod read;
pub mod write;
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJson {
pub schema: ArrowJsonSchema,
pub batches: Vec<ArrowJsonBatch>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonSchema {
pub fields: Vec<ArrowJsonField>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonField {
pub name: String,
#[serde(rename = "type")]
pub field_type: Value,
pub nullable: bool,
pub children: Vec<ArrowJsonField>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionary: Option<ArrowJsonFieldDictionary>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonFieldDictionary {
pub id: i64,
#[serde(rename = "indexType")]
pub index_type: IntegerType,
#[serde(rename = "isOrdered")]
pub is_ordered: bool,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct IntegerType {
pub name: String,
#[serde(rename = "isSigned")]
pub is_signed: bool,
#[serde(rename = "bitWidth")]
pub bit_width: i64,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonBatch {
count: usize,
pub columns: Vec<ArrowJsonColumn>,
}
#[derive(Deserialize, Serialize, Debug)]
#[allow(non_snake_case)]
pub struct ArrowJsonDictionaryBatch {
pub id: i64,
pub data: ArrowJsonBatch,
}
#[derive(Deserialize, Serialize, Clone, Debug)]
pub struct ArrowJsonColumn {
name: String,
pub count: usize,
#[serde(rename = "VALIDITY")]
pub validity: Option<Vec<u8>>,
#[serde(rename = "DATA")]
pub data: Option<Vec<Value>>,
#[serde(rename = "OFFSET")]
pub offset: Option<Vec<Value>>, #[serde(rename = "TYPE_ID")]
pub type_id: Option<Vec<Value>>,
pub children: Option<Vec<ArrowJsonColumn>>,
}
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Self {
Error::ExternalFormat(error.to_string())
}
}