|
4 | 4 |
|
5 | 5 | #pragma once
|
6 | 6 |
|
| 7 | +#include <gtest/gtest_prod.h> |
| 8 | +#include <re2/re2.h> |
| 9 | + |
7 | 10 | #include <string>
|
8 | 11 |
|
9 |
| -#include "engine/sparqlExpressions/LiteralExpression.h" |
10 | 12 | #include "engine/sparqlExpressions/SparqlExpression.h"
|
11 |
| -#include "re2/re2.h" |
12 | 13 |
|
13 | 14 | namespace sparqlExpression {
|
14 |
| -// Class implementing the REGEX function, which takes two mandatory arguments |
15 |
| -// (an expression and a regex) and one optional argument (a string of flags). |
16 |
| -class RegexExpression : public SparqlExpression { |
| 15 | +// Class implementing a specialization of the REGEX function. This optimization |
| 16 | +// is possible if the regex is known in advance and is a simple prefix regex. |
| 17 | +class PrefixRegexExpression : public SparqlExpression { |
17 | 18 | private:
|
18 |
| - SparqlExpression::Ptr child_; |
19 |
| - // The reguar expression. It needs to be a `std::optional` because `RE2` |
20 |
| - // objects do not have a default constructor. |
21 |
| - std::optional<RE2> regex_; |
22 |
| - // If this `std::optional` holds a string, we have a simple prefix regex |
23 |
| - // (which translates to a range search) and this string holds the prefix. |
24 |
| - std::optional<std::string> prefixRegex_; |
25 |
| - // The regex as a string, used for the cache key. |
26 |
| - std::string regexAsString_; |
27 |
| - |
28 |
| - // True iff the expression is enclosed in `STR()`. |
| 19 | + Ptr child_; |
| 20 | + // A simple prefix regex (which translates to a range search) and this string |
| 21 | + // holds the prefix. |
| 22 | + std::string prefixRegex_; |
| 23 | + // Holds the variable over which the regex is evaluated. |
| 24 | + Variable variable_; |
| 25 | + // If the variable is wrapped inside a `STR()` function, this is set to true. |
29 | 26 | bool childIsStrExpression_ = false;
|
30 | 27 |
|
31 |
| - public: |
32 | 28 | // The `child` must be a `VariableExpression` and `regex` must be a
|
33 | 29 | // `LiteralExpression` that stores a string, otherwise an exception will be
|
34 | 30 | // thrown.
|
35 |
| - RegexExpression(SparqlExpression::Ptr child, SparqlExpression::Ptr regex, |
36 |
| - std::optional<SparqlExpression::Ptr> optionalFlags); |
| 31 | + PrefixRegexExpression(Ptr child, std::string prefixRegex, Variable variable); |
| 32 | + |
| 33 | + public: |
| 34 | + PrefixRegexExpression(PrefixRegexExpression&&) noexcept = default; |
| 35 | + PrefixRegexExpression& operator=(PrefixRegexExpression&&) noexcept = default; |
| 36 | + PrefixRegexExpression(const PrefixRegexExpression&) noexcept = delete; |
| 37 | + PrefixRegexExpression& operator=(const PrefixRegexExpression&) noexcept = |
| 38 | + delete; |
| 39 | + |
| 40 | + // Check if the children of this expression allow for the prefix regex |
| 41 | + // optimization. If this is the case, a `PrefixRegexExpression` is returned, |
| 42 | + // otherwise `std::nullopt`. |
| 43 | + static std::optional<PrefixRegexExpression> |
| 44 | + makePrefixRegexExpressionIfPossible(Ptr& string, |
| 45 | + const SparqlExpression& regex); |
37 | 46 |
|
| 47 | + // ___________________________________________________________________________ |
38 | 48 | ExpressionResult evaluate(EvaluationContext* context) const override;
|
39 | 49 |
|
40 |
| - // _________________________________________________________________________ |
41 |
| - [[nodiscard]] string getCacheKey( |
| 50 | + // ___________________________________________________________________________ |
| 51 | + [[nodiscard]] std::string getCacheKey( |
42 | 52 | const VariableToColumnMap& varColMap) const override;
|
43 | 53 |
|
44 |
| - // _________________________________________________________________________ |
45 |
| - [[nodiscard]] bool isPrefixExpression() const; |
46 |
| - |
47 |
| - // _________________________________________________________________________ |
| 54 | + // ___________________________________________________________________________ |
48 | 55 | Estimates getEstimatesForFilterExpression(
|
49 | 56 | uint64_t inputSize,
|
50 | 57 | const std::optional<Variable>& firstSortedVariable) const override;
|
51 | 58 |
|
52 | 59 | private:
|
53 |
| - std::span<SparqlExpression::Ptr> childrenImpl() override; |
54 |
| - |
55 |
| - // Evaluate for the special case, where the expression is a variable and we |
56 |
| - // have a simple prefix regex (in which case the regex match translates to a |
57 |
| - // simple range check). |
58 |
| - ExpressionResult evaluatePrefixRegex( |
59 |
| - const Variable& variable, |
60 |
| - sparqlExpression::EvaluationContext* context) const; |
61 |
| - |
62 |
| - // Evaluate for the general case. |
63 |
| - CPP_template(typename T)(requires SingleExpressionResult<T>) ExpressionResult |
64 |
| - evaluateGeneralCase(T&& input, |
65 |
| - sparqlExpression::EvaluationContext* context) const; |
| 60 | + std::span<Ptr> childrenImpl() override; |
66 | 61 |
|
67 | 62 | // Check if the `CancellationHandle` of `context` has been cancelled and throw
|
68 | 63 | // an exception if this is the case.
|
69 |
| - static void checkCancellation( |
70 |
| - const sparqlExpression::EvaluationContext* context, |
71 |
| - ad_utility::source_location location = |
72 |
| - ad_utility::source_location::current()); |
| 64 | + static void checkCancellation(const EvaluationContext* context, |
| 65 | + ad_utility::source_location location = |
| 66 | + ad_utility::source_location::current()); |
| 67 | + |
| 68 | + // Check if `regex` is a prefix regex which means that it starts with `^` and |
| 69 | + // contains no other "special" regex characters like `*` or `.`. If this check |
| 70 | + // succeeds, the prefix is returned without the leading `^` and with all |
| 71 | + // escaping undone. Else, `std::nullopt` is returned. |
| 72 | + static std::optional<std::string> getPrefixRegex(std::string regex); |
| 73 | + |
| 74 | + FRIEND_TEST(RegexExpression, getPrefixRegex); |
73 | 75 | };
|
74 |
| -namespace detail { |
75 |
| -// Check if `regex` is a prefix regex which means that it starts with `^` and |
76 |
| -// contains no other "special" regex characters like `*` or `.`. If this check |
77 |
| -// succeeds, the prefix is returned without the leading `^` and with all |
78 |
| -// escaping undone. Else, `std::nullopt` is returned. |
79 |
| -std::optional<std::string> getPrefixRegex(std::string regex); |
80 |
| -} // namespace detail |
| 76 | + |
| 77 | +SparqlExpression::Ptr makeRegexExpression(SparqlExpression::Ptr string, |
| 78 | + SparqlExpression::Ptr regex, |
| 79 | + SparqlExpression::Ptr flags); |
81 | 80 | } // namespace sparqlExpression
|
0 commit comments