1313#include < string_utils/constants.hpp>
1414#include < string_utils/string_utils.hpp>
1515
16- #include " Defs.h"
17- #include " EncodedVariableInterpreter.hpp"
18- #include " ir/parsing.hpp"
19- #include " ir/types.hpp"
20- #include " LogTypeDictionaryReaderReq.hpp"
21- #include " Query.hpp"
22- #include " QueryToken.hpp"
23- #include " VariableDictionaryReaderReq.hpp"
16+ #include < clp/Defs.h>
17+ #include < clp/EncodedVariableInterpreter.hpp>
18+ #include < clp/ir/parsing.hpp>
19+ #include < clp/ir/types.hpp>
20+ #include < clp/LogTypeDictionaryReaderReq.hpp>
21+ #include < clp/Query.hpp>
22+ #include < clp/QueryToken.hpp>
23+ #include < clp/SchemaSearcher.hpp>
24+ #include < clp/VariableDictionaryReaderReq.hpp>
2425
2526namespace clp {
2627class GrepCore {
@@ -75,24 +76,6 @@ class GrepCore {
7576 bool & is_var
7677 );
7778
78- /* *
79- * Returns bounds of next potential variable (either a definite variable or a token with
80- * wildcards)
81- * @param value String containing token
82- * @param begin_pos Begin position of last token, changes to begin position of next token
83- * @param end_pos End position of last token, changes to end position of next token
84- * @param is_var Whether the token is definitely a variable
85- * @param lexer DFA for determining if input is in the schema
86- * @return true if another potential variable was found, false otherwise
87- */
88- static bool get_bounds_of_next_potential_var (
89- std::string const & value,
90- size_t & begin_pos,
91- size_t & end_pos,
92- bool & is_var,
93- log_surgeon::lexers::ByteLexer& lexer
94- );
95-
9679private:
9780 // Types
9881 enum class SubQueryMatchabilityResult : uint8_t {
@@ -163,13 +146,18 @@ std::optional<Query> GrepCore::process_raw_query(
163146 log_surgeon::lexers::ByteLexer& lexer,
164147 bool use_heuristic
165148) {
166- // Split search_string into tokens with wildcards
167- std::vector<QueryToken> query_tokens;
168- size_t begin_pos = 0 ;
169- size_t end_pos = 0 ;
170- bool is_var;
171- std::string search_string_for_sub_queries{search_string};
172- if (use_heuristic) {
149+ std::vector<SubQuery> sub_queries;
150+ if (false == use_heuristic) {
151+ sub_queries
152+ = SchemaSearcher::search (search_string, lexer, logtype_dict, var_dict, ignore_case);
153+ } else {
154+ // Split search_string into tokens with wildcards
155+ std::vector<QueryToken> query_tokens;
156+ size_t begin_pos{0 };
157+ size_t end_pos{0 };
158+ bool is_var{false };
159+ std::string search_string_for_sub_queries{search_string};
160+
173161 // Replace unescaped '?' wildcards with '*' wildcards since we currently have no support for
174162 // generating sub-queries with '?' wildcards. The final wildcard match on the decompressed
175163 // message uses the original wildcards, so correctness will be maintained.
@@ -192,70 +180,55 @@ std::optional<Query> GrepCore::process_raw_query(
192180 {
193181 query_tokens.emplace_back (search_string_for_sub_queries, begin_pos, end_pos, is_var);
194182 }
195- } else {
196- while (get_bounds_of_next_potential_var (
197- search_string_for_sub_queries,
198- begin_pos,
199- end_pos,
200- is_var,
201- lexer
202- ))
203- {
204- query_tokens.emplace_back (search_string_for_sub_queries, begin_pos, end_pos, is_var);
205- }
206- }
207-
208- // Get pointers to all ambiguous tokens. Exclude tokens with wildcards in the middle since we
209- // fall-back to decompression + wildcard matching for those.
210- std::vector<QueryToken*> ambiguous_tokens;
211- for (auto & query_token : query_tokens) {
212- if (!query_token.has_greedy_wildcard_in_middle () && query_token.is_ambiguous_token ()) {
213- ambiguous_tokens.push_back (&query_token);
183+ // Get pointers to all ambiguous tokens. Exclude tokens with wildcards in the middle since
184+ // we fall-back to decompression + wildcard matching for those.
185+ std::vector<QueryToken*> ambiguous_tokens;
186+ for (auto & query_token : query_tokens) {
187+ if (false == query_token.has_greedy_wildcard_in_middle ()
188+ && query_token.is_ambiguous_token ())
189+ {
190+ ambiguous_tokens.push_back (&query_token);
191+ }
214192 }
215- }
216193
217- // Generate a sub-query for each combination of ambiguous tokens
218- // E.g., if there are two ambiguous tokens each of which could be a logtype or variable, we need
219- // to create:
220- // - (token1 as logtype) (token2 as logtype)
221- // - (token1 as logtype) (token2 as var)
222- // - (token1 as var) (token2 as logtype)
223- // - (token1 as var) (token2 as var)
224- std::vector<SubQuery> sub_queries;
225- std::string logtype;
226- bool type_of_one_token_changed = true ;
227- while (type_of_one_token_changed) {
228- SubQuery sub_query;
229-
230- // Compute logtypes and variables for query
231- auto matchability = generate_logtypes_and_vars_for_subquery (
232- logtype_dict,
233- var_dict,
234- search_string_for_sub_queries,
235- query_tokens,
236- ignore_case,
237- sub_query
238- );
239- switch (matchability) {
240- case SubQueryMatchabilityResult::SupercedesAllSubQueries:
241- // Since other sub-queries will be superceded by this one, we can stop processing
242- // now
243- return Query{search_begin_ts, search_end_ts, ignore_case, search_string, {}};
244- case SubQueryMatchabilityResult::MayMatch:
245- sub_queries.push_back (std::move (sub_query));
246- break ;
247- case SubQueryMatchabilityResult::WontMatch:
248- default :
249- // Do nothing
250- break ;
251- }
194+ // Generate a sub-query for each combination of ambiguous tokens
195+ // E.g., if there are two ambiguous tokens each of which could be a logtype or variable, we
196+ // need to create:
197+ // - (token1 as logtype) (token2 as logtype)
198+ // - (token1 as logtype) (token2 as var)
199+ // - (token1 as var) (token2 as logtype)
200+ // - (token1 as var) (token2 as var)
201+ bool type_of_one_token_changed{true };
202+ while (type_of_one_token_changed) {
203+ SubQuery sub_query;
204+ auto matchability{generate_logtypes_and_vars_for_subquery (
205+ logtype_dict,
206+ var_dict,
207+ search_string_for_sub_queries,
208+ query_tokens,
209+ ignore_case,
210+ sub_query
211+ )};
212+ switch (matchability) {
213+ case SubQueryMatchabilityResult::SupercedesAllSubQueries:
214+ // Since other sub-queries will be superceded by this one, we can stop
215+ // processing now.
216+ return Query{search_begin_ts, search_end_ts, ignore_case, search_string, {}};
217+ case SubQueryMatchabilityResult::MayMatch:
218+ sub_queries.push_back (std::move (sub_query));
219+ break ;
220+ case SubQueryMatchabilityResult::WontMatch:
221+ default :
222+ break ;
223+ }
252224
253- // Update combination of ambiguous tokens
254- type_of_one_token_changed = false ;
255- for (auto * ambiguous_token : ambiguous_tokens) {
256- if (ambiguous_token->change_to_next_possible_type ()) {
257- type_of_one_token_changed = true ;
258- break ;
225+ // Update combination of ambiguous tokens
226+ type_of_one_token_changed = false ;
227+ for (auto * ambiguous_token : ambiguous_tokens) {
228+ if (ambiguous_token->change_to_next_possible_type ()) {
229+ type_of_one_token_changed = true ;
230+ break ;
231+ }
259232 }
260233 }
261234 }
0 commit comments