Skip to content

Commit 4816ef3

Browse files
Added yaml lexer for yaml file analysis (#4409)
Added yaml lexer for yaml file analysis fixes #4353 --------- Signed-off-by: Gino Augustine <ginoaugustine@gmail.com> Co-authored-by: Vladimir Kotal <vladimir.kotal@oracle.com>
1 parent ab70822 commit 4816ef3

File tree

13 files changed

+680
-3
lines changed

13 files changed

+680
-3
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ RUN echo 'deb https://package.perforce.com/apt/ubuntu jammy release' > /etc/apt/
5959
RUN apt-get update && \
6060
apt-get install --no-install-recommends -y git subversion mercurial cvs cssc bzr rcs rcs-blame helix-p4d \
6161
unzip inotify-tools python3 python3-pip \
62-
python3-venv python3-setuptools openssh-client
62+
python3-venv python3-setuptools openssh-client libyaml-dev
6363

6464
# compile and install universal-ctags
6565
# hadolint ignore=DL3003,DL3008

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerGuru.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
import org.opengrok.indexer.analysis.uue.UuencodeAnalyzerFactory;
110110
import org.opengrok.indexer.analysis.vb.VBAnalyzerFactory;
111111
import org.opengrok.indexer.analysis.verilog.VerilogAnalyzerFactory;
112+
import org.opengrok.indexer.analysis.yaml.YamlAnalyzerFactory;
112113
import org.opengrok.indexer.configuration.Project;
113114
import org.opengrok.indexer.configuration.RuntimeEnvironment;
114115
import org.opengrok.indexer.history.Annotation;
@@ -259,6 +260,7 @@ public class AnalyzerGuru {
259260
new IgnorantAnalyzerFactory(),
260261
new BZip2AnalyzerFactory(),
261262
new XMLAnalyzerFactory(),
263+
YamlAnalyzerFactory.DEFAULT_INSTANCE,
262264
MandocAnalyzerFactory.DEFAULT_INSTANCE,
263265
TroffAnalyzerFactory.DEFAULT_INSTANCE,
264266
new ELFAnalyzerFactory(),
@@ -340,15 +342,15 @@ public class AnalyzerGuru {
340342
* {@link FileAnalyzerFactory} subclasses are revised to target more or
341343
* different files.
342344
* @return a value whose lower 32-bits are a static value
343-
* 20201003_00
345+
* 20230921_00
344346
* for the current implementation and whose higher-32 bits are non-zero if
345347
* {@link #addExtension(java.lang.String, AnalyzerFactory)}
346348
* or
347349
* {@link #addPrefix(java.lang.String, AnalyzerFactory)}
348350
* has been called.
349351
*/
350352
public static long getVersionNo() {
351-
final int ver32 = 20201003_00; // Edit comment above too!
353+
final int ver32 = 20230921_00; // Edit comment above too!
352354
long ver = ver32;
353355
if (customizationHashCode != 0) {
354356
ver |= (long) customizationHashCode << 32;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates.
22+
*/
23+
package org.opengrok.indexer.analysis.yaml;
24+
25+
import java.util.HashSet;
26+
import java.util.Set;
27+
28+
/**
29+
* Holds static hash set containing the Java keywords.
30+
*/
31+
public class Consts {
32+
33+
static final Set<String> kwd = new HashSet<>();
34+
static {
35+
kwd.add("true");
36+
kwd.add("false");
37+
kwd.add("null");
38+
kwd.add("True");
39+
kwd.add("False");
40+
kwd.add("Null");
41+
kwd.add("TRUE");
42+
kwd.add("FALSE");
43+
kwd.add("NULL");
44+
}
45+
46+
private Consts() {
47+
}
48+
49+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates.
22+
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
23+
*/
24+
package org.opengrok.indexer.analysis.yaml;
25+
26+
import org.opengrok.indexer.analysis.AbstractAnalyzer;
27+
import org.opengrok.indexer.analysis.AnalyzerFactory;
28+
import org.opengrok.indexer.analysis.JFlexTokenizer;
29+
import org.opengrok.indexer.analysis.JFlexXref;
30+
import org.opengrok.indexer.analysis.plain.AbstractSourceCodeAnalyzer;
31+
32+
import java.io.Reader;
33+
34+
35+
/**
36+
*
37+
* @author Gino Augustine
38+
*/
39+
public class YamlAnalyzer extends AbstractSourceCodeAnalyzer {
40+
41+
42+
/**
43+
* Creates a new instance of YamlAnalyzer.
44+
* @param factory defined instance for the analyzer
45+
*/
46+
protected YamlAnalyzer(AnalyzerFactory factory) {
47+
super(factory, () -> new JFlexTokenizer(new YamlSymbolTokenizer(
48+
AbstractAnalyzer.DUMMY_READER)));
49+
}
50+
51+
/**
52+
* @return {@code "Java"}
53+
*/
54+
@Override
55+
public String getCtagsLang() {
56+
return "Yaml";
57+
}
58+
59+
/**
60+
* Gets a version number to be used to tag processed documents so that
61+
* re-analysis can be re-done later if a stored version number is different
62+
* from the current implementation.
63+
* @return 20230919_00
64+
*/
65+
@Override
66+
protected int getSpecializedVersionNo() {
67+
return 20230921_00; // Edit comment above too!
68+
}
69+
70+
71+
/**
72+
* Create an xref for the language supported by this analyzer.
73+
*
74+
* @param reader the data to produce xref for
75+
* @return an xref instance
76+
*/
77+
@Override
78+
protected JFlexXref newXref(Reader reader) {
79+
return new JFlexXref(new YamlXref(reader));
80+
}
81+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates.
22+
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
23+
*/
24+
package org.opengrok.indexer.analysis.yaml;
25+
26+
import org.opengrok.indexer.analysis.AbstractAnalyzer;
27+
import org.opengrok.indexer.analysis.FileAnalyzerFactory;
28+
29+
public class YamlAnalyzerFactory extends FileAnalyzerFactory {
30+
31+
private static final String NAME = "Yaml";
32+
33+
private static final String[] SUFFIXES = {
34+
"YAML",
35+
"YML"
36+
};
37+
38+
public static final YamlAnalyzerFactory DEFAULT_INSTANCE =
39+
new YamlAnalyzerFactory();
40+
41+
42+
private YamlAnalyzerFactory() {
43+
super(null, null, SUFFIXES, null, null, "text/plain",
44+
AbstractAnalyzer.Genre.PLAIN, NAME);
45+
}
46+
47+
@Override
48+
protected AbstractAnalyzer newAnalyzer() {
49+
return new YamlAnalyzer(this);
50+
}
51+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2023, Oracle and/or its affiliates.
22+
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
23+
*/
24+
25+
26+
27+
Identifier = [a-zA-Z0-9_-]+
28+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
23+
*/
24+
25+
/*
26+
* Gets YAML symbols - ignores comments, strings, keywords
27+
*/
28+
29+
package org.opengrok.indexer.analysis.yaml;
30+
31+
import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
32+
%%
33+
%public
34+
%class YamlSymbolTokenizer
35+
%extends JFlexSymbolMatcher
36+
%unicode
37+
%buffer 32766
38+
%int
39+
%include ../CommonLexer.lexh
40+
%include ../Common.lexh
41+
%include Yaml.lexh
42+
%char
43+
44+
%state STRING QSTRING SCOMMENT ALIAS_ANCHOR
45+
46+
ANCHOR_ALIAS_START = [-?:]{WhspChar}+[*\&]
47+
%%
48+
49+
50+
<YYINITIAL> {
51+
52+
{ANCHOR_ALIAS_START} { yybegin(ALIAS_ANCHOR); }
53+
\" { yybegin(STRING); }
54+
\' { yybegin(QSTRING); }
55+
"#" { yybegin(SCOMMENT); }
56+
}
57+
58+
<ALIAS_ANCHOR> {
59+
{Identifier} {
60+
String id = yytext();
61+
onSymbolMatched(id, yychar);
62+
return yystate();
63+
}
64+
[^] {yybegin(YYINITIAL);}
65+
}
66+
67+
<STRING> {
68+
\\[\"\\] {}
69+
\" { yybegin(YYINITIAL); }
70+
}
71+
72+
<QSTRING> {
73+
\\[\'\\] {}
74+
\' { yybegin(YYINITIAL); }
75+
}
76+
77+
78+
<SCOMMENT> {
79+
{EOL} { yybegin(YYINITIAL);}
80+
}
81+
82+
<YYINITIAL, STRING, SCOMMENT, QSTRING> {
83+
{WhspChar}+ {}
84+
[^] {}
85+
}

0 commit comments

Comments
 (0)