Skip to content

Commit 9c3a537

Browse files
authored
Support simplifying expressions such as ~ ^(ba_r|foo)$ , where the … (#6)
1 parent a6dcd94 commit 9c3a537

File tree

2 files changed

+45
-4
lines changed

2 files changed

+45
-4
lines changed

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

+33
Original file line numberDiff line numberDiff line change
@@ -2499,10 +2499,43 @@ mod tests {
24992499
col("c1")
25002500
.in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
25012501
);
2502+
assert_change(
2503+
regex_match(col("c1"), lit("^(fo_o)$")),
2504+
col("c1").eq(lit("fo_o")),
2505+
);
2506+
assert_change(
2507+
regex_match(col("c1"), lit("^(fo_o)$")),
2508+
col("c1").eq(lit("fo_o")),
2509+
);
2510+
assert_change(
2511+
regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
2512+
col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
2513+
);
2514+
assert_change(
2515+
regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
2516+
col("c1")
2517+
.not_eq(lit("fo_o"))
2518+
.and(col("c1").not_eq(lit("ba_r"))),
2519+
);
2520+
assert_change(
2521+
regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
2522+
((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
2523+
.or(col("c1").eq(lit("ba_z"))),
2524+
);
2525+
assert_change(
2526+
regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
2527+
col("c1").in_list(
2528+
vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
2529+
false,
2530+
),
2531+
);
25022532

25032533
// regular expressions that mismatch captured literals
25042534
assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
25052535
assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
2536+
assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
2537+
assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
2538+
assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
25062539
assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
25072540
assert_no_change(regex_match(col("c1"), lit("^foo|bar$")));
25082541
assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));

datafusion/optimizer/src/simplify_expressions/regex.rs

+12-4
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn collect_concat_to_like_string(parts: &[Hir]) -> Option<String> {
108108

109109
for sub in parts {
110110
if let HirKind::Literal(l) = sub.kind() {
111-
s.push_str(str_from_literal(l)?);
111+
s.push_str(like_str_from_literal(l)?);
112112
} else {
113113
return None;
114114
}
@@ -120,7 +120,7 @@ fn collect_concat_to_like_string(parts: &[Hir]) -> Option<String> {
120120

121121
/// returns a str represented by `Literal` if it contains a valid utf8
122122
/// sequence and is safe for like (has no '%' and '_')
123-
fn str_from_literal(l: &Literal) -> Option<&str> {
123+
fn like_str_from_literal(l: &Literal) -> Option<&str> {
124124
// if not utf8, no good
125125
let s = std::str::from_utf8(&l.0).ok()?;
126126

@@ -131,6 +131,14 @@ fn str_from_literal(l: &Literal) -> Option<&str> {
131131
}
132132
}
133133

134+
/// returns a str represented by `Literal` if it contains a valid utf8
135+
fn str_from_literal(l: &Literal) -> Option<&str> {
136+
// if not utf8, no good
137+
let s = std::str::from_utf8(&l.0).ok()?;
138+
139+
Some(s)
140+
}
141+
134142
fn is_safe_for_like(c: char) -> bool {
135143
(c != '%') && (c != '_')
136144
}
@@ -196,7 +204,7 @@ fn anchored_literal_to_expr(v: &[Hir]) -> Option<Expr> {
196204
2 => Some(lit("")),
197205
3 => {
198206
let HirKind::Literal(l) = v[1].kind() else { return None };
199-
str_from_literal(l).map(lit)
207+
like_str_from_literal(l).map(lit)
200208
}
201209
_ => None,
202210
}
@@ -242,7 +250,7 @@ fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option<Expr> {
242250
return Some(mode.expr(Box::new(left.clone()), "%".to_owned()));
243251
}
244252
HirKind::Literal(l) => {
245-
let s = str_from_literal(l)?;
253+
let s = like_str_from_literal(l)?;
246254
return Some(mode.expr(Box::new(left.clone()), format!("%{s}%")));
247255
}
248256
HirKind::Concat(inner) if is_anchored_literal(inner) => {

0 commit comments

Comments
 (0)