Extension types

This crate supports Arrows' "extension type", to declare, use, and share custom logical types.

An extension type is just a DataType with a name and some metadata. In particular, its physical representation is equal to its inner DataType, which implies that all functionality in this crate works as if it was the inner DataType.

The following example shows how to declare one:

use std::io::{Cursor, Seek, Write};

use arrow2::array::*;
use arrow2::chunk::Chunk;
use arrow2::datatypes::*;
use arrow2::error::Result;
use arrow2::io::ipc::read;
use arrow2::io::ipc::write;

fn main() -> Result<()> {
    // declare an extension.
    let extension_type =
        DataType::Extension("date16".to_string(), Box::new(DataType::UInt16), None);

    // initialize an array with it.
    let array = UInt16Array::from_slice([1, 2]).to(extension_type.clone());

    // from here on, it works as usual
    let buffer = Cursor::new(vec![]);

    // write to IPC
    let result_buffer = write_ipc(buffer, array)?;

    // read it back
    let batch = read_ipc(&result_buffer.into_inner())?;

    // and verify that the datatype is preserved.
    let array = &batch.columns()[0];
    assert_eq!(array.data_type(), &extension_type);

    // see https://arrow.apache.org/docs/format/Columnar.html#extension-types
    // for consuming by other consumers.
    Ok(())
}

fn write_ipc<W: Write + Seek>(writer: W, array: impl Array + 'static) -> Result<W> {
    let schema = vec![Field::new("a", array.data_type().clone(), false)].into();

    let options = write::WriteOptions { compression: None };
    let mut writer = write::FileWriter::new(writer, schema, None, options);

    let batch = Chunk::try_new(vec![Box::new(array) as Box<dyn Array>])?;

    writer.start()?;
    writer.write(&batch, None)?;
    writer.finish()?;

    Ok(writer.into_inner())
}

fn read_ipc(buf: &[u8]) -> Result<Chunk<Box<dyn Array>>> {
    let mut cursor = Cursor::new(buf);
    let metadata = read::read_file_metadata(&mut cursor)?;
    let mut reader = read::FileReader::new(cursor, metadata, None, None);
    reader.next().unwrap()
}