1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
use ahash::AHashMap;
use regex::Regex;
use super::utils::combine_validities;
use crate::array::{BooleanArray, Offset, Utf8Array};
use crate::bitmap::Bitmap;
use crate::datatypes::DataType;
use crate::error::{Error, Result};
pub fn regex_match<O: Offset>(values: &Utf8Array<O>, regex: &Utf8Array<O>) -> Result<BooleanArray> {
if values.len() != regex.len() {
return Err(Error::InvalidArgumentError(
"Cannot perform comparison operation on arrays of different length".to_string(),
));
}
let mut map = AHashMap::new();
let validity = combine_validities(values.validity(), regex.validity());
let iterator = values.iter().zip(regex.iter()).map(|(haystack, regex)| {
if haystack.is_none() | regex.is_none() {
return Result::Ok(false);
};
let haystack = haystack.unwrap();
let regex = regex.unwrap();
let regex = if let Some(regex) = map.get(regex) {
regex
} else {
let re = Regex::new(regex).map_err(|e| {
Error::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
map.insert(regex, re);
map.get(regex).unwrap()
};
Ok(regex.is_match(haystack))
});
let new_values = Bitmap::try_from_trusted_len_iter(iterator)?;
Ok(BooleanArray::new(DataType::Boolean, new_values, validity))
}
pub fn regex_match_scalar<O: Offset>(values: &Utf8Array<O>, regex: &str) -> Result<BooleanArray> {
let regex = Regex::new(regex)
.map_err(|e| Error::InvalidArgumentError(format!("Unable to compile regex: {}", e)))?;
Ok(unary_utf8_boolean(values, |x| regex.is_match(x)))
}
fn unary_utf8_boolean<O: Offset, F: Fn(&str) -> bool>(
values: &Utf8Array<O>,
op: F,
) -> BooleanArray {
let validity = values.validity().cloned();
let iterator = values.iter().map(|value| {
if value.is_none() {
return false;
};
op(value.unwrap())
});
let values = Bitmap::from_trusted_len_iter(iterator);
BooleanArray::new(DataType::Boolean, values, validity)
}