Skip to content

Commit

Permalink
Support "only named captures" for pipeline grok function (#65)
Browse files Browse the repository at this point in the history
The server cache is necessary because the named captures support needs a separately compiled regex.
So far the cache is only used by the grok function in the pipeline processor

Closes #59
  • Loading branch information
kroepke authored and joschi committed Aug 1, 2016
1 parent 56b801b commit 84a9d68
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ public class GrokMatch extends AbstractFunction<GrokMatch.GrokResult> {

private final ParameterDescriptor<String, String> valueParam;
private final ParameterDescriptor<String, String> patternParam;
private final ParameterDescriptor<Boolean, Boolean> namedOnly;

private final GrokPatternRegistry grokPatternRegistry;

@Inject
Expand All @@ -45,17 +47,20 @@ public GrokMatch(GrokPatternRegistry grokPatternRegistry) {

valueParam = ParameterDescriptor.string("value").build();
patternParam = ParameterDescriptor.string("pattern").build();
namedOnly = ParameterDescriptor.bool("only_named_captures").optional().build();
}

@Override
public GrokResult evaluate(FunctionArgs args, EvaluationContext context) {
final String value = valueParam.required(args, context);
final String pattern = patternParam.required(args, context);
final boolean onlyNamedCaptures = namedOnly.optional(args, context).orElse(false);

if (value == null || pattern == null) {
return null;
}

final Grok grok = grokPatternRegistry.cachedGrokForPattern(pattern);
final Grok grok = grokPatternRegistry.cachedGrokForPattern(pattern, onlyNamedCaptures);

final Match match = grok.match(value);
match.captures();
Expand All @@ -67,7 +72,7 @@ public FunctionDescriptor<GrokResult> descriptor() {
return FunctionDescriptor.<GrokResult>builder()
.name(NAME)
.returnType(GrokResult.class)
.params(of(patternParam, valueParam))
.params(of(patternParam, valueParam, namedOnly))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ public static void registerFunctions() {
Set<GrokPattern> patterns = Sets.newHashSet(
GrokPattern.create("GREEDY", ".*"),
GrokPattern.create("BASE10NUM", "(?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))"),
GrokPattern.create("NUMBER", "(?:%{BASE10NUM:UNWANTED})")
GrokPattern.create("NUMBER", "(?:%{BASE10NUM:UNWANTED})"),
GrokPattern.create("NUM", "%{BASE10NUM}")
);
when(grokPatternService.loadAll()).thenReturn(patterns);
final EventBus clusterBus = new EventBus();
Expand Down Expand Up @@ -370,8 +371,11 @@ public void grok() {
final Message message = evaluateRule(rule);

assertThat(message).isNotNull();
assertThat(message.getFieldCount()).isEqualTo(4);
assertThat(message.getFieldCount()).isEqualTo(5);
assertThat(message.getTimestamp()).isEqualTo(DateTime.parse("2015-07-31T10:05:36.773Z"));
// named captures only
assertThat(message.hasField("num")).isTrue();
assertThat(message.hasField("BASE10NUM")).isFalse();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,8 @@ when true
then
let matches = grok(pattern: "%{GREEDY:timestamp;date;yyyy-MM-dd'T'HH:mm:ss.SSSX}", value: "2015-07-31T10:05:36.773Z");
set_fields(matches);

// only named captures
let matches1 = grok("%{NUM:num}", "10", true);
set_fields(matches1);
end

0 comments on commit 84a9d68

Please sign in to comment.