Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added yaml lexer for yaml file analysis #4409

Merged
merged 9 commits into from
Sep 21, 2023
Merged
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ RUN echo 'deb https://package.perforce.com/apt/ubuntu jammy release' > /etc/apt/
RUN apt-get update && \
apt-get install --no-install-recommends -y git subversion mercurial cvs cssc bzr rcs rcs-blame helix-p4d \
unzip inotify-tools python3 python3-pip \
python3-venv python3-setuptools openssh-client
python3-venv python3-setuptools openssh-client libyaml-dev

# compile and install universal-ctags
# hadolint ignore=DL3003,DL3008
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
import org.opengrok.indexer.analysis.uue.UuencodeAnalyzerFactory;
import org.opengrok.indexer.analysis.vb.VBAnalyzerFactory;
import org.opengrok.indexer.analysis.verilog.VerilogAnalyzerFactory;
import org.opengrok.indexer.analysis.yaml.YamlAnalyzerFactory;
import org.opengrok.indexer.configuration.Project;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.history.Annotation;
Expand Down Expand Up @@ -259,6 +260,7 @@ public class AnalyzerGuru {
new IgnorantAnalyzerFactory(),
new BZip2AnalyzerFactory(),
new XMLAnalyzerFactory(),
YamlAnalyzerFactory.DEFAULT_INSTANCE,
MandocAnalyzerFactory.DEFAULT_INSTANCE,
TroffAnalyzerFactory.DEFAULT_INSTANCE,
new ELFAnalyzerFactory(),
Expand Down Expand Up @@ -340,15 +342,15 @@ public class AnalyzerGuru {
* {@link FileAnalyzerFactory} subclasses are revised to target more or
* different files.
* @return a value whose lower 32-bits are a static value
* 20201003_00
* 20230921_00
* for the current implementation and whose higher-32 bits are non-zero if
* {@link #addExtension(java.lang.String, AnalyzerFactory)}
* or
* {@link #addPrefix(java.lang.String, AnalyzerFactory)}
* has been called.
*/
public static long getVersionNo() {
final int ver32 = 20201003_00; // Edit comment above too!
final int ver32 = 20230921_00; // Edit comment above too!
long ver = ver32;
if (customizationHashCode != 0) {
ver |= (long) customizationHashCode << 32;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2023, Oracle and/or its affiliates.
*/
package org.opengrok.indexer.analysis.yaml;

import java.util.HashSet;
import java.util.Set;

/**
* Holds static hash set containing the Java keywords.
*/
public class Consts {

static final Set<String> kwd = new HashSet<>();
static {
kwd.add("true");
kwd.add("false");
kwd.add("null");
kwd.add("True");
kwd.add("False");
kwd.add("Null");
kwd.add("TRUE");
kwd.add("FALSE");
kwd.add("NULL");
}

private Consts() {
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2023, Oracle and/or its affiliates.
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
*/
package org.opengrok.indexer.analysis.yaml;

import org.opengrok.indexer.analysis.AbstractAnalyzer;
import org.opengrok.indexer.analysis.AnalyzerFactory;
import org.opengrok.indexer.analysis.JFlexTokenizer;
import org.opengrok.indexer.analysis.JFlexXref;
import org.opengrok.indexer.analysis.plain.AbstractSourceCodeAnalyzer;

import java.io.Reader;


/**
*
* @author Gino Augustine
*/
public class YamlAnalyzer extends AbstractSourceCodeAnalyzer {


/**
* Creates a new instance of YamlAnalyzer.
* @param factory defined instance for the analyzer
*/
protected YamlAnalyzer(AnalyzerFactory factory) {
super(factory, () -> new JFlexTokenizer(new YamlSymbolTokenizer(
AbstractAnalyzer.DUMMY_READER)));
}

/**
* @return {@code "Java"}
*/
@Override
public String getCtagsLang() {
return "Yaml";
}

/**
* Gets a version number to be used to tag processed documents so that
* re-analysis can be re-done later if a stored version number is different
* from the current implementation.
* @return 20230919_00
*/
@Override
protected int getSpecializedVersionNo() {
return 20230921_00; // Edit comment above too!
}


/**
* Create an xref for the language supported by this analyzer.
*
* @param reader the data to produce xref for
* @return an xref instance
*/
@Override
protected JFlexXref newXref(Reader reader) {
return new JFlexXref(new YamlXref(reader));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2023, Oracle and/or its affiliates.
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
*/
package org.opengrok.indexer.analysis.yaml;

import org.opengrok.indexer.analysis.AbstractAnalyzer;
import org.opengrok.indexer.analysis.FileAnalyzerFactory;

public class YamlAnalyzerFactory extends FileAnalyzerFactory {

private static final String NAME = "Yaml";

private static final String[] SUFFIXES = {
"YAML",
"YML"
};

public static final YamlAnalyzerFactory DEFAULT_INSTANCE =
new YamlAnalyzerFactory();


private YamlAnalyzerFactory() {
super(null, null, SUFFIXES, null, null, "text/plain",
AbstractAnalyzer.Genre.PLAIN, NAME);
}

@Override
protected AbstractAnalyzer newAnalyzer() {
return new YamlAnalyzer(this);
}
}
28 changes: 28 additions & 0 deletions opengrok-indexer/src/main/jflex/analysis/yaml/Yaml.lexh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2023, Oracle and/or its affiliates.
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
*/



Identifier = [a-zA-Z0-9_-]+

Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2023, Gino Augustine <gino.augustine@oracle.com>.
*/

/*
* Gets YAML symbols - ignores comments, strings, keywords
*/

package org.opengrok.indexer.analysis.yaml;

import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
%%
%public
%class YamlSymbolTokenizer
%extends JFlexSymbolMatcher
%unicode
%buffer 32766
%int
%include ../CommonLexer.lexh
%include ../Common.lexh
%include Yaml.lexh
%char

%state STRING QSTRING SCOMMENT ALIAS_ANCHOR

ANCHOR_ALIAS_START = [-?:]{WhspChar}+[*\&]
%%


<YYINITIAL> {

{ANCHOR_ALIAS_START} { yybegin(ALIAS_ANCHOR); }
\" { yybegin(STRING); }
\' { yybegin(QSTRING); }
"#" { yybegin(SCOMMENT); }
}

<ALIAS_ANCHOR> {
{Identifier} {
String id = yytext();
onSymbolMatched(id, yychar);
return yystate();
}
[^] {yybegin(YYINITIAL);}
}

<STRING> {
\\[\"\\] {}
\" { yybegin(YYINITIAL); }
}

<QSTRING> {
\\[\'\\] {}
\' { yybegin(YYINITIAL); }
}


<SCOMMENT> {
{EOL} { yybegin(YYINITIAL);}
}

<YYINITIAL, STRING, SCOMMENT, QSTRING> {
{WhspChar}+ {}
[^] {}
}
Loading