Skip to content

Commit 9e95bec

Browse files
committed
0.19.0 - improve annotation parsing to handle nameof(...) and default(...) arguments and some refactoring toward more functional code
1 parent 88ab130 commit 9e95bec

19 files changed

+381
-385
lines changed

CHANGELOG.md

+19-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,25 @@ This project does its best to adhere to [Semantic Versioning](http://semver.org/
44

55

66
--------
7-
### [0.18.1](N/A) - 2019-07-02
7+
### [0.19.0](N/A) - 2019-07-04
8+
#### Changed
9+
* `IdentifierTokenizer.createIdentifierWithGenericTypeTokenizer()` now takes one parameter `int maxGenericTypeDepth`
10+
* Changed `CsFileTokenizer.createFileParser()` -> `createCsTokenizers()` and `JavaFileTokenizer.createFileParser()` -> `createJavaTokenizers()`
11+
* Added some private constructors that throw AssertionError to static classes
12+
13+
#### Removed
14+
* Changed `GenericTypeTokenizer._createGenericTypeTokenizer()` from public to private
15+
* Removed `IdentifierTokenizer` field `static int genericTypeDepth` in favor of callers explicitly passing the argument to `createIdentifierWithGenericTypeTokenizer()` which now takes one parameter `int maxGenericTypeDepth`
16+
* Removed `CodeTokenizerBuilder` in favor of `CodeTokenizer` static methods
17+
* Manually build a tokenizer list of type `PairList<CharParserFactory, TextTransformer<CodeTokenType>>`
18+
* Call `CodeTokenizer.createTokenizer()` with the language you used to pass to the `CodeTokenizerBuilder` constructor and the list of tokenizers you manually created
19+
20+
#### Fixed
21+
* `AnnotationExtractor` to handle all C# keyword-followed-by-a-block annotation arguments like `default(T)`, `nameof(T)`, and `typeof(T)`
22+
23+
24+
--------
25+
### [0.18.1](https://github.com/TeamworkGuy2/JParseCode/commit/88ab130b4a6e79bdefa3f071ec64c19e316e91af) - 2019-07-02
826
#### Fixed
927
* `AnnotationExtractor` to handle C# `typeof(T)` annotation arguments
1028

bin/jparse_code-with-tests.jar

-1.08 KB
Binary file not shown.

bin/jparse_code.jar

-1.43 KB
Binary file not shown.

package-lib.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"version" : "0.18.1",
2+
"version" : "0.19.0",
33
"name" : "jparse-code",
44
"description" : "An in-progress suite of parsing/transpilation tools for C#, Java, and TypeScript code. Generates simple JSON ASTs.",
55
"homepage" : "https://github.com/TeamworkGuy2/JParseCode",

src/twg2/parser/codeParser/csharp/CsFileTokenizer.java

+31-19
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,58 @@
11
package twg2.parser.codeParser.csharp;
22

3+
import twg2.collections.dataStructures.PairList;
34
import twg2.parser.Inclusion;
45
import twg2.parser.codeParser.CommentStyle;
56
import twg2.parser.fragment.CodeTokenType;
67
import twg2.parser.language.CodeLanguageOptions;
8+
import twg2.parser.textFragment.TextTransformer;
79
import twg2.parser.tokenizers.CodeBlockTokenizer;
810
import twg2.parser.tokenizers.CodeStringTokenizer;
9-
import twg2.parser.tokenizers.CodeTokenizerBuilder;
11+
import twg2.parser.tokenizers.CodeTokenizer;
1012
import twg2.parser.tokenizers.CommentTokenizer;
1113
import twg2.parser.tokenizers.IdentifierTokenizer;
1214
import twg2.parser.tokenizers.NumberTokenizer;
1315
import twg2.text.tokenizer.CharParserFactory;
1416
import twg2.text.tokenizer.StringBoundedParserBuilder;
1517
import twg2.text.tokenizer.StringParserBuilder;
1618

19+
import static twg2.parser.tokenizers.CodeTokenizer.ofType;
20+
1721
/**
1822
* @author TeamworkGuy2
1923
* @since 2015-2-9
2024
*/
2125
public class CsFileTokenizer {
26+
public static int maxGenericTypeDepth = 3;
2227
public static int cnt = 0;
2328

24-
public static CodeTokenizerBuilder<CodeLanguageOptions.CSharp> createFileParser() {
25-
var identifierParser = IdentifierTokenizer.createIdentifierWithGenericTypeTokenizer();
29+
30+
public static CodeTokenizer createCsTokenizer() {
31+
return CodeTokenizer.createTokenizer(CodeLanguageOptions.C_SHARP, createCsTokenizers());
32+
}
33+
34+
35+
public static PairList<CharParserFactory, TextTransformer<CodeTokenType>> createCsTokenizers() {
36+
var identifierParser = IdentifierTokenizer.createIdentifierWithGenericTypeTokenizer(maxGenericTypeDepth);
2637
var numericLiteralParser = NumberTokenizer.createNumericLiteralTokenizer();
2738

28-
var parser = new CodeTokenizerBuilder<>(CodeLanguageOptions.C_SHARP)
29-
.addParser(CommentTokenizer.createCommentTokenizer(CommentStyle.multiAndSingleLine()), CodeTokenType.COMMENT)
30-
.addParser(CodeStringTokenizer.createStringTokenizerForCSharp(), CodeTokenType.STRING)
31-
.addParser(CodeBlockTokenizer.createBlockTokenizer('{', '}'), CodeTokenType.BLOCK)
32-
.addParser(CodeBlockTokenizer.createBlockTokenizer('(', ')'), CodeTokenType.BLOCK)
33-
.addParser(createAnnotationTokenizer(), CodeTokenType.BLOCK)
34-
.addParser(identifierParser, (text, off, len) -> {
35-
cnt++;
36-
// TODO performance
37-
return CsKeyword.check.isKeyword(text.toString()) ? CodeTokenType.KEYWORD : CodeTokenType.IDENTIFIER; // possible bad performance
38-
})
39-
.addParser(createOperatorTokenizer(), CodeTokenType.OPERATOR)
40-
.addParser(createSeparatorTokenizer(), CodeTokenType.SEPARATOR)
41-
.addParser(numericLiteralParser, CodeTokenType.NUMBER);
42-
43-
return parser;
39+
var parsers = new PairList<CharParserFactory, TextTransformer<CodeTokenType>>();
40+
41+
parsers.add(CommentTokenizer.createCommentTokenizer(CommentStyle.multiAndSingleLine()), ofType(CodeTokenType.COMMENT));
42+
parsers.add(CodeStringTokenizer.createStringTokenizerForCSharp(), ofType(CodeTokenType.STRING));
43+
parsers.add(CodeBlockTokenizer.createBlockTokenizer('{', '}'), ofType(CodeTokenType.BLOCK));
44+
parsers.add(CodeBlockTokenizer.createBlockTokenizer('(', ')'), ofType(CodeTokenType.BLOCK));
45+
parsers.add(createAnnotationTokenizer(), ofType(CodeTokenType.BLOCK));
46+
parsers.add(identifierParser, (text, off, len) -> {
47+
cnt++;
48+
// TODO performance
49+
return CsKeyword.check.isKeyword(text.toString()) ? CodeTokenType.KEYWORD : CodeTokenType.IDENTIFIER; // possible bad performance
50+
});
51+
parsers.add(createOperatorTokenizer(), ofType(CodeTokenType.OPERATOR));
52+
parsers.add(createSeparatorTokenizer(), ofType(CodeTokenType.SEPARATOR));
53+
parsers.add(numericLiteralParser, ofType(CodeTokenType.NUMBER));
54+
55+
return parsers;
4456
}
4557

4658

src/twg2/parser/codeParser/extractors/AnnotationExtractor.java

+46-41
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import java.util.HashMap;
44

55
import twg2.ast.interm.annotation.AnnotationSig;
6-
import twg2.parser.codeParser.csharp.CsKeyword;
6+
import twg2.collections.interfaces.ListReadOnly;
77
import twg2.parser.codeParser.tools.NameUtil;
88
import twg2.parser.fragment.CodeToken;
99
import twg2.parser.fragment.CodeTokenType;
@@ -64,46 +64,7 @@ public static AnnotationSig parseAnnotationBlock(CodeLanguage lang, CodeTokenTyp
6464
}
6565

6666
// parse the annotation argument value
67-
// number: 'Annotation(1)' or 'Annotation(-15)'
68-
int num;
69-
if((num = DataTypeExtractor.isNumber(param, (i + 1 < size ? paramChilds.get(i + 1).getData() : null))) > 0) {
70-
String paramValue = param.getText() + (i + 1 < size && num > 1 ? paramChilds.get(i + 1).getData().getText() : "");
71-
params.put(paramName, paramValue);
72-
i += (num - 1);
73-
}
74-
// string: 'Annotation("str")'
75-
else if(paramType == CodeTokenType.STRING) {
76-
String valueStr = StringTrim.trimQuotes(param.getText());
77-
78-
// handles concatenated strings 'Annotation(name = 'a' + 'b')
79-
if(i + 2 < size && operatorUtil.concatOperators().is(paramChilds.get(i + 1).getData()) && paramChilds.get(i + 2).getData().getTokenType() == CodeTokenType.STRING) {
80-
valueStr = valueStr + StringTrim.trimQuotes(paramChilds.get(i + 2).getData().getText());
81-
i += 2;
82-
}
83-
84-
params.put(paramName, valueStr);
85-
}
86-
else if(paramType == CodeTokenType.KEYWORD) {
87-
if(param.getText().toUpperCase().contains("TYPEOF")) {
88-
System.out.println("test");
89-
}
90-
// type-literal-keyword: 'Annotation(true)'
91-
if(lang.getKeywordUtil().typeLiterals().is(param)) {
92-
params.put(paramName, param.getText());
93-
}
94-
// hack for C# typeof(T) in annotation parameter lists
95-
else if(CsKeyword.TYPEOF.toSrc().equals(param.getText()) && i + 1 < size && CodeTokenType.BLOCK == paramChilds.get(i + 1).getData().getTokenType()) {
96-
params.put(paramName, param.getText() + paramChilds.get(i + 1).getData().getText());
97-
i++;
98-
}
99-
}
100-
// catches other things like 'Annotation(Integer.TYPE)' or 'Annotation(String.class)'
101-
else if(paramType == CodeTokenType.IDENTIFIER) {
102-
params.put(paramName, param.getText());
103-
}
104-
else {
105-
throw new IllegalArgumentException("annotation param expected to start with identifier, string, number, or boolean, found " + paramType + " '" + param.getText() + "'");
106-
}
67+
i += parseAnnotationArgument(lang, param, paramName, paramType, i, size, paramChilds, params);
10768
}
10869
}
10970
// contains just an annotation name, no (arguments...), e.g. 'Annotation'
@@ -117,4 +78,48 @@ else if(paramType == CodeTokenType.IDENTIFIER) {
11778
return new AnnotationSig(annotName, NameUtil.splitFqName(annotName), params);
11879
}
11980

81+
82+
private static int parseAnnotationArgument(CodeLanguage lang, CodeToken param, String paramName, CodeTokenType paramType, int i, int size, ListReadOnly<SimpleTree<CodeToken>> paramChilds, HashMap<String, String> dstParams) {
83+
// number: 'Annotation(1)' or 'Annotation(-15)'
84+
int num;
85+
if((num = DataTypeExtractor.isNumber(param, (i + 1 < size ? paramChilds.get(i + 1).getData() : null))) > 0) {
86+
String paramValue = param.getText() + (i + 1 < size && num > 1 ? paramChilds.get(i + 1).getData().getText() : "");
87+
dstParams.put(paramName, paramValue);
88+
return (num - 1);
89+
}
90+
// string: 'Annotation("str")'
91+
else if(paramType == CodeTokenType.STRING) {
92+
String valueStr = StringTrim.trimQuotes(param.getText());
93+
94+
// handle concatenated strings 'Annotation(name = 'a' + 'b')
95+
if(i + 2 < size && lang.getOperatorUtil().concatOperators().is(paramChilds.get(i + 1).getData()) && paramChilds.get(i + 2).getData().getTokenType() == CodeTokenType.STRING) {
96+
valueStr = valueStr + StringTrim.trimQuotes(paramChilds.get(i + 2).getData().getText());
97+
dstParams.put(paramName, valueStr);
98+
return 2;
99+
}
100+
else {
101+
dstParams.put(paramName, valueStr);
102+
return 0;
103+
}
104+
}
105+
// keyword-or-identifier followed by constant-block-expression: 'Annotation(typeof(String))' (for C# default(T), nameof(T), and typeof(T))
106+
else if((paramType == CodeTokenType.KEYWORD || paramType == CodeTokenType.IDENTIFIER) && i + 1 < size && CodeTokenType.BLOCK == paramChilds.get(i + 1).getData().getTokenType()) {
107+
dstParams.put(paramName, param.getText() + paramChilds.get(i + 1).getData().getText());
108+
return 1;
109+
}
110+
// type-literal-keyword: 'Annotation(true)'
111+
else if(paramType == CodeTokenType.KEYWORD && lang.getKeywordUtil().typeLiterals().is(param)) {
112+
dstParams.put(paramName, param.getText());
113+
return 0;
114+
}
115+
// catches other things like 'Annotation(Integer.TYPE)' or 'Annotation(String.class)'
116+
else if(paramType == CodeTokenType.IDENTIFIER) {
117+
dstParams.put(paramName, param.getText());
118+
return 0;
119+
}
120+
else {
121+
throw new IllegalArgumentException("annotation param expected to start with identifier, string, number, or boolean, found " + paramType + " '" + param.getText() + "'");
122+
}
123+
}
124+
120125
}

src/twg2/parser/codeParser/extractors/CommentAndWhitespaceExtractor.java

+8-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.util.EnumSet;
66
import java.util.List;
77

8+
import twg2.collections.dataStructures.PairList;
89
import twg2.collections.primitiveCollections.IntArrayList;
910
import twg2.collections.primitiveCollections.IntListSorted;
1011
import twg2.parser.codeParser.CommentStyle;
@@ -13,8 +14,9 @@
1314
import twg2.parser.fragment.CodeTokenType;
1415
import twg2.parser.language.CodeLanguage;
1516
import twg2.parser.textFragment.TextFragmentRef;
17+
import twg2.parser.textFragment.TextTransformer;
1618
import twg2.parser.tokenizers.CodeStringTokenizer;
17-
import twg2.parser.tokenizers.CodeTokenizerBuilder;
19+
import twg2.parser.tokenizers.CodeTokenizer;
1820
import twg2.parser.tokenizers.CommentTokenizer;
1921
import twg2.parser.workflow.CodeFileSrc;
2022
import twg2.text.stringUtils.StringCheck;
@@ -41,10 +43,11 @@ public static CodeFileSrc buildCommentsAndWhitespaceTree(EnumSet<CommentStyle> s
4143
CharParserFactory stringParser = CodeStringTokenizer.createStringTokenizerForJavascript();
4244
CharParserFactory commentParser = CommentTokenizer.createCommentTokenizer(style);
4345

44-
var parser = new CodeTokenizerBuilder<>((CodeLanguage)null)
45-
.addParser(commentParser, CodeTokenType.COMMENT)
46-
.addParser(stringParser, CodeTokenType.STRING)
47-
.build();
46+
var parsers = new PairList<CharParserFactory, TextTransformer<CodeTokenType>>();
47+
parsers.add(commentParser, CodeTokenizer.ofType(CodeTokenType.COMMENT));
48+
parsers.add(stringParser, CodeTokenizer.ofType(CodeTokenType.STRING));
49+
50+
var parser = CodeTokenizer.createTokenizer((CodeLanguage)null, parsers);
4851
return parser.tokenizeDocument(src, srcOff, srcLen, srcName, null);
4952
}
5053

src/twg2/parser/codeParser/java/JavaFileTokenizer.java

+30-17
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,52 @@
33
import twg2.parser.codeParser.CommentStyle;
44
import twg2.parser.fragment.CodeTokenType;
55
import twg2.parser.language.CodeLanguageOptions;
6+
import twg2.parser.textFragment.TextTransformer;
67
import twg2.parser.tokenizers.CodeBlockTokenizer;
78
import twg2.parser.tokenizers.CodeStringTokenizer;
8-
import twg2.parser.tokenizers.CodeTokenizerBuilder;
9+
import twg2.parser.tokenizers.CodeTokenizer;
910
import twg2.parser.tokenizers.CommentTokenizer;
1011
import twg2.parser.tokenizers.IdentifierTokenizer;
1112
import twg2.parser.tokenizers.NumberTokenizer;
1213
import twg2.text.tokenizer.CharParserFactory;
1314
import twg2.text.tokenizer.StringParserBuilder;
1415

16+
import static twg2.parser.tokenizers.CodeTokenizer.ofType;
17+
18+
import twg2.collections.dataStructures.PairList;
19+
1520
/**
1621
* @author TeamworkGuy2
1722
* @since 2015-2-9
1823
*/
1924
public class JavaFileTokenizer {
25+
public static int maxGenericTypeDepth = 3;
26+
27+
28+
public static CodeTokenizer createJavaTokenizer() {
29+
return CodeTokenizer.createTokenizer(CodeLanguageOptions.C_SHARP, createJavaTokenizers());
30+
}
31+
2032

21-
public static CodeTokenizerBuilder<CodeLanguageOptions.Java> createFileParser() {
22-
var identifierParser = IdentifierTokenizer.createIdentifierWithGenericTypeTokenizer();
33+
public static PairList<CharParserFactory, TextTransformer<CodeTokenType>> createJavaTokenizers() {
34+
var identifierParser = IdentifierTokenizer.createIdentifierWithGenericTypeTokenizer(maxGenericTypeDepth);
2335
var numericLiteralParser = NumberTokenizer.createNumericLiteralTokenizer();
2436

25-
var parser = new CodeTokenizerBuilder<>(CodeLanguageOptions.JAVA)
26-
.addParser(CommentTokenizer.createCommentTokenizer(CommentStyle.multiAndSingleLine()), CodeTokenType.COMMENT)
27-
.addParser(CodeStringTokenizer.createStringTokenizerForJava(), CodeTokenType.STRING)
28-
.addParser(CodeBlockTokenizer.createBlockTokenizer('{', '}'), CodeTokenType.BLOCK)
29-
.addParser(CodeBlockTokenizer.createBlockTokenizer('(', ')'), CodeTokenType.BLOCK)
30-
// no annotation parser, instead we parse
31-
.addParser(identifierParser, (text, off, len) -> {
32-
return JavaKeyword.check.isKeyword(text.toString()) ? CodeTokenType.KEYWORD : CodeTokenType.IDENTIFIER; // possible bad performance
33-
})
34-
.addParser(createOperatorTokenizer(), CodeTokenType.OPERATOR)
35-
.addParser(createSeparatorTokenizer(), CodeTokenType.SEPARATOR)
36-
.addParser(numericLiteralParser, CodeTokenType.NUMBER);
37-
38-
return parser;
37+
var parsers = new PairList<CharParserFactory, TextTransformer<CodeTokenType>>();
38+
39+
parsers.add(CommentTokenizer.createCommentTokenizer(CommentStyle.multiAndSingleLine()), ofType(CodeTokenType.COMMENT));
40+
parsers.add(CodeStringTokenizer.createStringTokenizerForJava(), ofType(CodeTokenType.STRING));
41+
parsers.add(CodeBlockTokenizer.createBlockTokenizer('{', '}'), ofType(CodeTokenType.BLOCK));
42+
parsers.add(CodeBlockTokenizer.createBlockTokenizer('(', ')'), ofType(CodeTokenType.BLOCK));
43+
// no annotation parser, instead we parse
44+
parsers.add(identifierParser, (text, off, len) -> {
45+
return JavaKeyword.check.isKeyword(text.toString()) ? CodeTokenType.KEYWORD : CodeTokenType.IDENTIFIER; // possible bad performance
46+
});
47+
parsers.add(createOperatorTokenizer(), ofType(CodeTokenType.OPERATOR));
48+
parsers.add(createSeparatorTokenizer(), ofType(CodeTokenType.SEPARATOR));
49+
parsers.add(numericLiteralParser, ofType(CodeTokenType.NUMBER));
50+
51+
return parsers;
3952
}
4053

4154

src/twg2/parser/language/CodeLanguageOptions.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import twg2.parser.codeParser.java.JavaFileTokenizer;
2929
import twg2.parser.codeParser.java.JavaKeyword;
3030
import twg2.parser.codeParser.java.JavaOperator;
31-
import twg2.parser.tokenizers.CodeTokenizerBuilder;
31+
import twg2.parser.tokenizers.CodeTokenizer;
3232
import twg2.parser.workflow.CodeFileSrc;
3333
import twg2.parser.workflow.ParseInput;
3434

@@ -160,12 +160,12 @@ public static class Java extends CodeLanguageImpl<JavaBlock, JavaKeyword, Java,
160160

161161
public static final CSharp C_SHARP = registerCodeLanguage(
162162
new CSharp("C#", new CsBlockUtil(), new CsAstUtil(), CsKeyword.check, CsOperator.check,
163-
CodeTokenizerBuilder.createTokenizerWithTimer(() -> CsFileTokenizer.createFileParser().build()), new CsBlockParser(), Arrays.asList("cs"))
163+
CodeTokenizer.createTokenizerWithTimer(() -> CsFileTokenizer.createCsTokenizer()), new CsBlockParser(), Arrays.asList("cs"))
164164
);
165165

166166
public static final Java JAVA = registerCodeLanguage(
167167
new Java("Java", new JavaBlockUtil(), new JavaAstUtil(), JavaKeyword.check, JavaOperator.check,
168-
CodeTokenizerBuilder.createTokenizerWithTimer(() -> JavaFileTokenizer.createFileParser().build()), new JavaBlockParser(), Arrays.asList("java"))
168+
CodeTokenizer.createTokenizerWithTimer(() -> JavaFileTokenizer.createJavaTokenizer()), new JavaBlockParser(), Arrays.asList("java"))
169169
);
170170

171171
public static final CodeLanguageImpl<BlockType, Keyword, CodeLanguage, Operator, AstUtil<BlockType, Keyword>, OperatorUtil<Operator>, AstExtractor<BlockType>> JAVASCRIPT = registerCodeLanguage(

src/twg2/parser/miscellaneous/MiscellaneousTest.java

-49
This file was deleted.

0 commit comments

Comments
 (0)