Skip to content

Commit c0c55df

Browse files
committed
snowflake compatability
1 parent a994450 commit c0c55df

File tree

2 files changed

+35
-16
lines changed

2 files changed

+35
-16
lines changed

crates/embucket-functions/src/regexp/regexp_instr.rs

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,13 @@ impl RegexpInstrFunc {
8282
#[allow(
8383
clippy::cast_possible_truncation,
8484
clippy::cast_sign_loss,
85-
clippy::as_conversions
85+
clippy::as_conversions,
86+
clippy::too_many_lines,
87+
clippy::unwrap_used
8688
)]
8789
fn take_args_values(args: &[ColumnarValue]) -> DFResult<(usize, usize, usize, &str, usize)> {
8890
let position = args.get(2).map_or_else(
89-
|| Ok(0), // Default value of 0 if the index is out of bounds
91+
|| Ok(0),
9092
|value| match value {
9193
ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 <= *value => {
9294
Ok(*value as usize - 1)
@@ -102,12 +104,12 @@ impl RegexpInstrFunc {
102104
data_type: other.data_type(),
103105
position: 3usize,
104106
}
105-
.fail(), // Construct the error
107+
.fail(),
106108
},
107109
)?;
108110

109111
let occurrence = args.get(3).map_or_else(
110-
|| Ok(0), // Default value of 0 if the index is out of bounds
112+
|| Ok(0),
111113
|value| match value {
112114
ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 <= *value => {
113115
Ok(*value as usize - 1)
@@ -123,7 +125,7 @@ impl RegexpInstrFunc {
123125
data_type: other.data_type(),
124126
position: 4usize,
125127
}
126-
.fail(), // Construct the error
128+
.fail(),
127129
},
128130
)?;
129131

@@ -135,9 +137,7 @@ impl RegexpInstrFunc {
135137
{
136138
Ok(*value as usize)
137139
}
138-
ColumnarValue::Scalar(ScalarValue::Int64(Some(value)))
139-
if !(0..=1).contains(value) =>
140-
{
140+
ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) => {
141141
regexp_errors::WrongArgValueSnafu {
142142
got: value.to_string(),
143143
reason: "Return option must be 0, 1, or NULL".to_string(),
@@ -148,23 +148,37 @@ impl RegexpInstrFunc {
148148
data_type: other.data_type(),
149149
position: 5usize,
150150
}
151-
.fail(), // Construct the error
151+
.fail(),
152152
},
153153
)?;
154154

155155
let regexp_parameters = args.get(5).map_or_else(
156-
|| Ok(""), // Default value of 0 if the index is out of bounds
156+
|| Ok("c"),
157157
|value| match value {
158158
ColumnarValue::Scalar(
159159
ScalarValue::Utf8(Some(value))
160160
| ScalarValue::Utf8View(Some(value))
161161
| ScalarValue::LargeUtf8(Some(value)),
162-
) => Ok(value),
162+
) if value.contains(['c', 'i', 'm', 'e', 's']) => Ok(value),
163+
ColumnarValue::Scalar(
164+
ScalarValue::Utf8(Some(value))
165+
| ScalarValue::Utf8View(Some(value))
166+
| ScalarValue::LargeUtf8(Some(value)),
167+
) if value.is_empty() => Ok("c"),
168+
ColumnarValue::Scalar(
169+
ScalarValue::Utf8(Some(value))
170+
| ScalarValue::Utf8View(Some(value))
171+
| ScalarValue::LargeUtf8(Some(value)),
172+
) => regexp_errors::WrongArgValueSnafu {
173+
got: value.to_string(),
174+
reason: format!("Unknown parameter: '{}'", value.get(0..1).unwrap()),
175+
}
176+
.fail(),
163177
other => regexp_errors::UnsupportedInputTypeWithPositionSnafu {
164178
data_type: other.data_type(),
165179
position: 6usize,
166180
}
167-
.fail(), // Construct the error
181+
.fail(),
168182
},
169183
)?;
170184

@@ -175,7 +189,7 @@ impl RegexpInstrFunc {
175189
} else {
176190
Ok(0)
177191
}
178-
}, // Default value of 0 if the index is out of bounds
192+
},
179193
|value| match value {
180194
ColumnarValue::Scalar(ScalarValue::Int64(Some(value))) if 0 <= *value => {
181195
Ok(*value as usize)
@@ -191,7 +205,7 @@ impl RegexpInstrFunc {
191205
data_type: other.data_type(),
192206
position: 6usize,
193207
}
194-
.fail(), // Construct the error
208+
.fail(),
195209
},
196210
)?;
197211

crates/embucket-functions/src/utils.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,13 @@ where
2727
}
2828

2929
pub fn pattern_to_regex(pattern: &str, regexp_paramaters: &str) -> Result<Regex, regex::Error> {
30+
let case_insensitive = regexp_paramaters
31+
.chars()
32+
.rev()
33+
.find(|&ch| ch == 'i' || ch == 'c')
34+
== Some('i');
3035
RegexBuilder::new(pattern)
31-
.case_insensitive(regexp_paramaters.contains('i') && !regexp_paramaters.contains('c'))
36+
.case_insensitive(case_insensitive)
3237
.multi_line(regexp_paramaters.contains('m'))
3338
.dot_matches_new_line(regexp_paramaters.contains('s'))
3439
.build()
@@ -41,5 +46,5 @@ pub fn regexp<'h, 'r: 'h>(
4146
) -> impl Iterator<Item = Option<CaptureMatches<'r, 'h>>> {
4247
array
4348
.iter()
44-
.map(move |opt| opt.map(move |s| regex.captures_iter(&s[position..])))
49+
.map(move |opt| opt.map(move |s| regex.captures_iter(&s[position.min(s.len())..])))
4550
}

0 commit comments

Comments
 (0)