Extension types
This crate supports Arrows' "extension type", to declare, use, and share custom logical types.
An extension type is just a DataType
with a name and some metadata.
In particular, its physical representation is equal to its inner DataType
, which implies
that all functionality in this crate works as if it was the inner DataType
.
The following example shows how to declare one:
use std::io::{Cursor, Seek, Write};
use arrow2::array::*;
use arrow2::chunk::Chunk;
use arrow2::datatypes::*;
use arrow2::error::Result;
use arrow2::io::ipc::read;
use arrow2::io::ipc::write;
fn main() -> Result<()> {
// declare an extension.
let extension_type =
DataType::Extension("date16".to_string(), Box::new(DataType::UInt16), None);
// initialize an array with it.
let array = UInt16Array::from_slice([1, 2]).to(extension_type.clone());
// from here on, it works as usual
let buffer = Cursor::new(vec![]);
// write to IPC
let result_buffer = write_ipc(buffer, array)?;
// read it back
let batch = read_ipc(&result_buffer.into_inner())?;
// and verify that the datatype is preserved.
let array = &batch.columns()[0];
assert_eq!(array.data_type(), &extension_type);
// see https://arrow.apache.org/docs/format/Columnar.html#extension-types
// for consuming by other consumers.
Ok(())
}
fn write_ipc<W: Write + Seek>(writer: W, array: impl Array + 'static) -> Result<W> {
let schema = vec![Field::new("a", array.data_type().clone(), false)].into();
let options = write::WriteOptions { compression: None };
let mut writer = write::FileWriter::new(writer, schema, None, options);
let batch = Chunk::try_new(vec![Box::new(array) as Box<dyn Array>])?;
writer.start()?;
writer.write(&batch, None)?;
writer.finish()?;
Ok(writer.into_inner())
}
fn read_ipc(buf: &[u8]) -> Result<Chunk<Box<dyn Array>>> {
let mut cursor = Cursor::new(buf);
let metadata = read::read_file_metadata(&mut cursor)?;
let mut reader = read::FileReader::new(cursor, metadata, None, None);
reader.next().unwrap()
}