1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
use std::hash::{Hash, Hasher};
use std::{collections::hash_map::DefaultHasher, sync::Arc};

use hash_hasher::HashedMap;

use crate::{
    array::{primitive::MutablePrimitiveArray, Array, MutableArray, TryExtend, TryPush},
    bitmap::MutableBitmap,
    datatypes::DataType,
    error::{Error, Result},
};

use super::{DictionaryArray, DictionaryKey};

/// A mutable, strong-typed version of [`DictionaryArray`].
///
/// # Example
/// Building a UTF8 dictionary with `i32` keys.
/// ```
/// # use arrow2::array::{MutableDictionaryArray, MutableUtf8Array, TryPush};
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut array: MutableDictionaryArray<i32, MutableUtf8Array<i32>> = MutableDictionaryArray::new();
/// array.try_push(Some("A"))?;
/// array.try_push(Some("B"))?;
/// array.push_null();
/// array.try_push(Some("C"))?;
/// # Ok(())
/// # }
/// ```
#[derive(Debug)]
pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {
    data_type: DataType,
    keys: MutablePrimitiveArray<K>,
    map: HashedMap<u64, K>,
    // invariant: `keys.len() <= values.len()`
    values: M,
}

impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {
    fn from(mut other: MutableDictionaryArray<K, M>) -> Self {
        // Safety - the invariant of this struct ensures that this is up-held
        unsafe {
            DictionaryArray::<K>::try_new_unchecked(
                other.data_type,
                other.keys.into(),
                other.values.as_box(),
            )
            .unwrap()
        }
    }
}

impl<K: DictionaryKey, M: MutableArray> From<M> for MutableDictionaryArray<K, M> {
    fn from(values: M) -> Self {
        Self {
            data_type: DataType::Dictionary(
                K::KEY_TYPE,
                Box::new(values.data_type().clone()),
                false,
            ),
            keys: MutablePrimitiveArray::<K>::new(),
            map: HashedMap::default(),
            values,
        }
    }
}

impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {
    /// Creates an empty [`MutableDictionaryArray`].
    pub fn new() -> Self {
        let values = M::default();
        Self {
            data_type: DataType::Dictionary(
                K::KEY_TYPE,
                Box::new(values.data_type().clone()),
                false,
            ),
            keys: MutablePrimitiveArray::<K>::new(),
            map: HashedMap::default(),
            values,
        }
    }
}

impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {
    fn default() -> Self {
        Self::new()
    }
}

impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
    /// Returns whether the value should be pushed to the values or not
    fn try_push_valid<T: Hash>(&mut self, value: &T) -> Result<bool> {
        let mut hasher = DefaultHasher::new();
        value.hash(&mut hasher);
        let hash = hasher.finish();
        match self.map.get(&hash) {
            Some(key) => {
                self.keys.push(Some(*key));
                Ok(false)
            }
            None => {
                let key = K::try_from(self.map.len()).map_err(|_| Error::Overflow)?;
                self.map.insert(hash, key);
                self.keys.push(Some(key));
                Ok(true)
            }
        }
    }

    /// pushes a null value
    pub fn push_null(&mut self) {
        self.keys.push(None)
    }

    /// returns a mutable reference to the inner values.
    fn mut_values(&mut self) -> &mut M {
        &mut self.values
    }

    /// returns a reference to the inner values.
    pub fn values(&self) -> &M {
        &self.values
    }

    /// converts itself into [`Arc<dyn Array>`]
    pub fn into_arc(self) -> Arc<dyn Array> {
        let a: DictionaryArray<K> = self.into();
        Arc::new(a)
    }

    /// converts itself into [`Box<dyn Array>`]
    pub fn into_box(self) -> Box<dyn Array> {
        let a: DictionaryArray<K> = self.into();
        Box::new(a)
    }

    /// Reserves `additional` slots.
    pub fn reserve(&mut self, additional: usize) {
        self.keys.reserve(additional);
    }

    /// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.
    pub fn shrink_to_fit(&mut self) {
        self.values.shrink_to_fit();
        self.keys.shrink_to_fit();
    }

    /// Returns the dictionary map
    pub fn map(&self) -> &HashedMap<u64, K> {
        &self.map
    }

    /// Returns the dictionary keys
    pub fn keys(&self) -> &MutablePrimitiveArray<K> {
        &self.keys
    }

    fn take_into(&mut self) -> DictionaryArray<K> {
        // Safety - the invariant of this struct ensures that this is up-held
        unsafe {
            DictionaryArray::<K>::try_new(
                self.data_type.clone(),
                std::mem::take(&mut self.keys).into(),
                self.values.as_box(),
            )
            .unwrap()
        }
    }
}

impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {
    fn len(&self) -> usize {
        self.keys.len()
    }

    fn validity(&self) -> Option<&MutableBitmap> {
        self.keys.validity()
    }

    fn as_box(&mut self) -> Box<dyn Array> {
        Box::new(self.take_into())
    }

    fn as_arc(&mut self) -> Arc<dyn Array> {
        Arc::new(self.take_into())
    }

    fn data_type(&self) -> &DataType {
        &self.data_type
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
        self
    }

    fn push_null(&mut self) {
        self.keys.push(None)
    }

    fn reserve(&mut self, additional: usize) {
        self.reserve(additional)
    }

    fn shrink_to_fit(&mut self) {
        self.shrink_to_fit()
    }
}

impl<K, M, T: Hash> TryExtend<Option<T>> for MutableDictionaryArray<K, M>
where
    K: DictionaryKey,
    M: MutableArray + TryExtend<Option<T>>,
{
    fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> Result<()> {
        for value in iter {
            if let Some(value) = value {
                if self.try_push_valid(&value)? {
                    self.mut_values().try_extend(std::iter::once(Some(value)))?;
                }
            } else {
                self.push_null();
            }
        }
        Ok(())
    }
}

impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>
where
    K: DictionaryKey,
    M: MutableArray + TryPush<Option<T>>,
    T: Hash,
{
    fn try_push(&mut self, item: Option<T>) -> Result<()> {
        if let Some(value) = item {
            if self.try_push_valid(&value)? {
                self.values.try_push(Some(value))
            } else {
                Ok(())
            }
        } else {
            self.push_null();
            Ok(())
        }
    }
}