Skip to content

Commit 9967d65

Browse files
committed
0.19.2 - Add varargs '...' parser support and update 'jtext-parser' and 'jtext-tokenizer' dependencies.
1 parent 691c019 commit 9967d65

22 files changed

+163
-126
lines changed

CHANGELOG.md

+17-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,23 @@ This project does its best to adhere to [Semantic Versioning](http://semver.org/
44

55

66
--------
7-
### [0.19.1](N/A) - 2020-04-20
7+
### [0.19.2](N/A) - 2020-05-23
8+
__Parameter varargs parsing support__ (i.e. 'int...' in Java).
9+
#### Changed
10+
* Update dependency `jtext-parser@0.16.0` and `jtext-tokenizer@0.4.0`
11+
* Classes combined, class names simplified, and unused classes and methods removed from libraries
12+
* Code identifier parser now provided by `jtext-tokenizer`
13+
* Several bug fixes around compound optional parser conditions
14+
* Added `char[] src, int srcOff, int srcLen` parameters to `CodeTokenizer.tokenizeDocument()`
15+
* Renamed `IdentifierTokenizer` `newIdentifierTokenizer()` to `createIdentifierTokenizer()`
16+
* Improved unit tests
17+
18+
#### Removed
19+
* `IdentifierTokenizer.createIdentifierTokenizer()`
20+
21+
22+
--------
23+
### [0.19.1](https://github.com/TeamworkGuy2/JParseCode/commit/691c019ee2b8a889bd44a8048957fdf86a02bcd4) - 2020-04-20
824
#### Changed
925
* Finish `CommentAndWhitespaceExtractor` and tests for it
1026
* `TextToken` interface now includes `hashCode()` and `equals(Object)`

README.md

+6-6
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ JSON Result (printed to System.out):
177177

178178
## Command Line Interface (CLI)
179179

180-
A command line call looks like:
180+
A command line call looks like this:
181181
```
182182
path/to/java -jar path/to/jparse-code.jar
183183
-sources './src/java/Server/Services=1,[cs];./src/java/Server/Models=3,[cs]'
@@ -190,7 +190,7 @@ Where `./src/java/Server/**` is where source files are kept
190190
And the files in `./src/java/Server/Services` belong to the C# namespace `App.Services` and `./src/java/Server/Models` belong to the C# namespace `App.Entities`
191191

192192

193-
### Sources
193+
### -sources
194194
A semicolon separated list of paths set equal to a directory depth followed by a comma and a comma separated, brackets wrapped, list of file extensions.
195195
The path, child directory depth, and file extensions are used to create a file system filter and all matching files are parsed.
196196
The following formats are valid:
@@ -202,26 +202,26 @@ Example: ```/project/myApp/Models=3,[java,json]```
202202
Note: the brackets around the '[java,json]' file extension list are literal.
203203

204204

205-
### Destinations
205+
### -destinations
206206
A semicolon separated list of output file names associated with lists of namespaces. Each parsed file who's namespace falls into one of these lists is written to that file.
207207
The following format is valid:
208208
'path=[namespace,namespace,...]'
209209

210210
Example: ```/project/output/models.json=[MyApp.Models]```
211211

212212

213-
### Log
213+
### -log
214214
An optional log file name to write parser information to, in the format:
215215
'path'
216216

217217
Example: ```/project/output/parser-log.log```
218218

219219

220-
### Threads
220+
### -threads
221221
An optional number of threads to run parsing in parallel, 0 uses the logical number of processors on the current machine, default is 1
222222

223223

224-
### Debug
224+
### -debug
225225
An optional flag which causes extra debug and performance information to be logged
226226

227227

bin/jparse_code-with-tests.jar

-205 Bytes
Binary file not shown.

bin/jparse_code.jar

-4.24 KB
Binary file not shown.

package-lib.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"version" : "0.19.1",
2+
"version" : "0.19.2",
33
"name" : "jparse-code",
44
"description" : "An in-progress suite of parsing/transpilation tools for C#, Java, and TypeScript code. Generates simple JSON ASTs.",
55
"homepage" : "https://github.com/TeamworkGuy2/JParseCode",

rsc/csharp/ParserExamples/Models/AlbumInfo.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class AlbumInfo {
1616
public string AlbumName { get; set; }
1717

1818
/// <value>The track duration in milliseconds</value>
19-
public IList<TrackInfo> Tracks { get; set }
19+
public IList<TrackInfo> Tracks { get; set; }
2020

2121
}
2222

rsc/java/ParserExamples/Models/AlbumInfo.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/// <threadsafety>
99
/// This class is mutable. And it is not thread-safe.
1010
/// </threadsafety>
11-
[DataContract]
11+
@DataContract
1212
public class AlbumInfo {
1313

1414
/// <value>The track name.</value>

rsc/java/ParserExamples/Models/TrackInfo.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
/// <threadsafety>
1111
/// This class is mutable. And it is not thread-safe.
1212
/// </threadsafety>
13-
[DataContract]
13+
@DataContract
1414
public class TrackInfo implements Serializable, Comparable<TrackInfo> {
1515

1616
/// <value>The track name.</value>

src/twg2/parser/codeParser/csharp/CsFileTokenizer.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package twg2.parser.codeParser.csharp;
22

33
import twg2.collections.dataStructures.PairList;
4-
import twg2.parser.Inclusion;
54
import twg2.parser.codeParser.CommentStyle;
65
import twg2.parser.fragment.CodeTokenType;
76
import twg2.parser.language.CodeLanguageOptions;
@@ -13,7 +12,7 @@
1312
import twg2.parser.tokenizers.IdentifierTokenizer;
1413
import twg2.parser.tokenizers.NumberTokenizer;
1514
import twg2.text.tokenizer.CharParserFactory;
16-
import twg2.text.tokenizer.StringBoundedParserBuilder;
15+
import twg2.text.tokenizer.Inclusion;
1716
import twg2.text.tokenizer.StringParserBuilder;
1817

1918
import static twg2.parser.tokenizers.CodeTokenizer.ofType;
@@ -57,7 +56,7 @@ public static PairList<CharParserFactory, TextTransformer<CodeTokenType>> create
5756

5857

5958
public static CharParserFactory createAnnotationTokenizer() {
60-
CharParserFactory annotationParser = new StringBoundedParserBuilder("C# annotation")
59+
CharParserFactory annotationParser = new StringParserBuilder("C# annotation")
6160
.addStartEndNotPrecededByMarkers("block [ ]", '[', '[', ']', Inclusion.INCLUDE)
6261
.isCompound(true)
6362
.build();

src/twg2/parser/tokenizers/CodeBlockTokenizer.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package twg2.parser.tokenizers;
22

3-
import twg2.parser.Inclusion;
43
import twg2.text.tokenizer.CharParserFactory;
5-
import twg2.text.tokenizer.StringBoundedParserBuilder;
4+
import twg2.text.tokenizer.Inclusion;
5+
import twg2.text.tokenizer.StringParserBuilder;
66

77
/**
88
* @author TeamworkGuy2
@@ -14,7 +14,7 @@ public class CodeBlockTokenizer {
1414

1515

1616
public static CharParserFactory createBlockTokenizer(char startChar, char endChar) {
17-
CharParserFactory commentParser = new StringBoundedParserBuilder("block")
17+
CharParserFactory commentParser = new StringParserBuilder("block")
1818
.addStartEndMarkers("block " + startChar + " " + endChar, startChar, endChar, Inclusion.INCLUDE)
1919
.isCompound(true)
2020
.build();

src/twg2/parser/tokenizers/CodeStringTokenizer.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package twg2.parser.tokenizers;
22

3-
import twg2.parser.Inclusion;
43
import twg2.text.tokenizer.CharParserFactory;
5-
import twg2.text.tokenizer.StringBoundedParserBuilder;
4+
import twg2.text.tokenizer.Inclusion;
5+
import twg2.text.tokenizer.StringParserBuilder;
66

77
/**
88
* @author TeamworkGuy2
@@ -14,7 +14,7 @@ public final class CodeStringTokenizer {
1414

1515

1616
public static final CharParserFactory createStringTokenizerForJava() {
17-
CharParserFactory stringParser = new StringBoundedParserBuilder("Java string")
17+
CharParserFactory stringParser = new StringParserBuilder("Java string")
1818
.addStartEndNotPrecededByMarkers("string literal", '"', '\\', '"', Inclusion.INCLUDE)
1919
.addStartEndNotPrecededByMarkers("char literal", '\'', '\\', '\'', Inclusion.INCLUDE)
2020
.build();
@@ -24,7 +24,7 @@ public static final CharParserFactory createStringTokenizerForJava() {
2424

2525
// TODO make parser work with all types of C# string literals
2626
public static final CharParserFactory createStringTokenizerForCSharp() {
27-
CharParserFactory stringParser = new StringBoundedParserBuilder("C# string")
27+
CharParserFactory stringParser = new StringParserBuilder("C# string")
2828
.addStartEndNotPrecededByMarkers("string literal", '"', '\\', '"', Inclusion.INCLUDE)
2929
.addStartEndNotPrecededByMarkers("char literal", '\'', '\\', '\'', Inclusion.INCLUDE)
3030
.build();
@@ -33,7 +33,7 @@ public static final CharParserFactory createStringTokenizerForCSharp() {
3333

3434

3535
public static final CharParserFactory createStringTokenizerForJavascript() {
36-
CharParserFactory stringParser = new StringBoundedParserBuilder("JS string")
36+
CharParserFactory stringParser = new StringParserBuilder("JS string")
3737
.addStartEndNotPrecededByMarkers("string literal", '"', '\\', '"', Inclusion.INCLUDE)
3838
.addStartEndNotPrecededByMarkers("char literal", '\'', '\\', '\'', Inclusion.INCLUDE)
3939
.build();

src/twg2/parser/tokenizers/CodeTokenizer.java

+9-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
import twg2.parser.fragment.CodeTokenType;
1515
import twg2.parser.fragment.TextToken;
1616
import twg2.parser.language.CodeLanguage;
17-
import twg2.parser.textFragment.TextConsumer;
17+
import twg2.parser.textFragment.TextFragmentConsumer;
18+
import twg2.parser.textFragment.TextFragmentRef;
1819
import twg2.parser.textFragment.TextFragmentRefImpl;
1920
import twg2.parser.textFragment.TextFragmentRefImplMut;
2021
import twg2.parser.textFragment.TextTransformer;
@@ -100,10 +101,10 @@ public static <_T_LANG extends CodeLanguage> CodeFileSrc tokenizeCodeFile(PairLi
100101
var input = TextCharsParser.of(src, srcOff, srcLen);
101102

102103
var docTextFragment = new TextFragmentRefImplMut(srcOff, srcOff + srcLen, 0, 0, -1, -1);
103-
var docRoot = new CodeToken(CodeTokenType.DOCUMENT, docTextFragment, docTextFragment.getText(src, srcOff, srcLen).toString());
104+
var docRoot = new CodeToken(CodeTokenType.DOCUMENT, docTextFragment, docTextFragment.getText(0, src, srcOff, srcLen).toString());
104105

105-
SimpleTree<CodeToken> docTree = tokenizeDocument(srcName, input, stepsDetails, tokenizers, docRoot,
106-
(type, frag) -> new CodeToken(type, frag, frag.getText(src, srcOff, srcLen).toString()),
106+
SimpleTree<CodeToken> docTree = tokenizeDocument(srcName, input, src, srcOff, srcLen, stepsDetails, tokenizers, docRoot,
107+
(type, frag) -> new CodeToken(type, frag, frag.getText(0, src, srcOff, srcLen).toString()),
107108
(docFrag) -> docFrag.getTokenType().isCompound(),
108109
(parent, child) -> parent != child && parent.getToken().contains(child.getToken()));
109110

@@ -126,6 +127,7 @@ public static <_T_LANG extends CodeLanguage> CodeFileSrc tokenizeCodeFile(PairLi
126127
public static <D extends TextToken<S, T>, T, S> SimpleTree<D> tokenizeDocument(
127128
String srcName,
128129
TextParser input,
130+
char[] src, int srcOff, int srcLen,
129131
ParserActionLogger stepsDetails,
130132
PairList<? extends CharParserFactory, ? extends TextTransformer<T>> tokenizers,
131133
D root,
@@ -135,12 +137,13 @@ public static <D extends TextToken<S, T>, T, S> SimpleTree<D> tokenizeDocument(
135137
) {
136138
SimpleTreeImpl<D> tree = new SimpleTreeImpl<>(root);
137139

138-
List<Entry<CharParserFactory, TextConsumer>> conditions = new ArrayList<>();
140+
List<Entry<CharParserFactory, TextFragmentConsumer>> conditions = new ArrayList<>();
139141

140142
for(int i = 0, size = tokenizers.size(); i < size; i++) {
141143
TextTransformer<T> transformer = tokenizers.getValue(i);
142144

143-
conditions.add(Tuples.of(tokenizers.getKey(i), (text, off, len, lineStart, columnStart, lineEnd, columnEnd) -> {
145+
conditions.add(Tuples.of(tokenizers.getKey(i), (off, len, lineStart, columnStart, lineEnd, columnEnd) -> {
146+
var text = TextFragmentRef.getText(srcOff, src, srcOff, srcLen, off, off + len);
144147
T elemType = transformer.apply(text, off, len);
145148
var textFragment = new TextFragmentRefImpl(off, off + len, lineStart, columnStart, lineEnd, columnEnd);
146149

src/twg2/parser/tokenizers/CommentTokenizer.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import java.util.EnumSet;
44

5-
import twg2.parser.Inclusion;
65
import twg2.parser.codeParser.CommentStyle;
76
import twg2.text.tokenizer.CharParserFactory;
8-
import twg2.text.tokenizer.StringBoundedParserBuilder;
7+
import twg2.text.tokenizer.Inclusion;
8+
import twg2.text.tokenizer.StringParserBuilder;
99

1010
/**
1111
* @author TeamworkGuy2
@@ -17,7 +17,7 @@ public final class CommentTokenizer {
1717

1818

1919
public static final CharParserFactory createCommentTokenizerForJava() {
20-
CharParserFactory commentParser = new StringBoundedParserBuilder("comment")
20+
CharParserFactory commentParser = new StringParserBuilder("comment")
2121
.addStartEndMarkers("multi-line comment", "/*", "*/", Inclusion.INCLUDE)
2222
.addStartEndMarkers("single-line comment", "//", '\n', Inclusion.EXCLUDE)
2323
.build();
@@ -26,7 +26,7 @@ public static final CharParserFactory createCommentTokenizerForJava() {
2626

2727

2828
public static final CharParserFactory createCommentTokenizer(EnumSet<CommentStyle> style) {
29-
StringBoundedParserBuilder commentParser = new StringBoundedParserBuilder("comment " + style);
29+
StringParserBuilder commentParser = new StringParserBuilder("comment " + style);
3030
int markerCount = 0;
3131

3232
if(style.contains(CommentStyle.MULTILINE_C_STYLE)) {

src/twg2/parser/tokenizers/GenericTypeTokenizer.java

+12-15
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
import java.util.function.Supplier;
55

66
import twg2.collections.primitiveCollections.CharArrayList;
7-
import twg2.parser.Inclusion;
87
import twg2.parser.condition.text.CharParserMatchable;
98
import twg2.text.tokenizer.CharConditionPipe;
109
import twg2.text.tokenizer.CharConditions;
10+
import twg2.text.tokenizer.Inclusion;
1111
import twg2.text.tokenizer.StringConditions;
1212

1313
/** Static methods for creating generic type tokenizers that support nesting (i.e. for tokenizing '{@code HashMap<Entry<String, Integer>, List<String>>}').
@@ -35,20 +35,17 @@ private static CharParserMatchable _createGenericTypeTokenizer(int recursionDept
3535

3636
var typeIdentifierParser = Arrays.asList(singleIdentifierParserConstructor.get());
3737
// TODO only matches generic types in the format '<a, b>', allow whitespace between '<'/'>' and after ','
38-
var genericParamsParser = Arrays.asList(CharConditionPipe.createPipeOptionalSuffixesAny("generic type and array dimensions", Arrays.asList(
39-
CharConditionPipe.createPipeAllRequired("generic type signature", Arrays.asList(
40-
new CharConditions.Literal("<", CharArrayList.of('<'), Inclusion.INCLUDE),
41-
CharConditionPipe.createPipeRepeatableSeparator("generic type params",
42-
Arrays.asList(nestedGenericTypeIdentifierCond),
43-
Arrays.asList(new StringConditions.Literal("separator", new String[] { ", " }, Inclusion.INCLUDE))
44-
),
45-
new CharConditions.Literal(">", CharArrayList.of('>'), Inclusion.INCLUDE)
46-
))), Arrays.asList(
47-
CharConditionPipe.createPipeRepeatableSeparator("array dimensions '[]'...", Arrays.asList(new StringConditions.Literal("array dimension '[]'", new String[] { "[]" }, Inclusion.INCLUDE)), null)
48-
)
49-
));
50-
51-
return CharConditionPipe.createPipeOptionalSuffix("type parser", typeIdentifierParser, genericParamsParser);
38+
var genericParamsParser = CharConditionPipe.createPipeAllRequired("generic type signature",
39+
new CharConditions.Literal("<", CharArrayList.of('<'), Inclusion.INCLUDE),
40+
CharConditionPipe.createPipeRepeatableSeparator("generic type params",
41+
Arrays.asList(nestedGenericTypeIdentifierCond),
42+
Arrays.asList(new StringConditions.Literal("separator", new String[] { ", " }, Inclusion.INCLUDE))
43+
),
44+
new CharConditions.Literal(">", CharArrayList.of('>'), Inclusion.INCLUDE)
45+
);
46+
var arrayDimensionsParser = CharConditionPipe.createPipeRepeatableSeparator("array dimensions '[]'...", Arrays.asList(new StringConditions.Literal("array dimension '[]'", new String[] { "[]" }, Inclusion.INCLUDE)), null);
47+
48+
return CharConditionPipe.createPipeOptionalSuffix("type parser", typeIdentifierParser, Arrays.asList(genericParamsParser, arrayDimensionsParser));
5249
}
5350

5451
}

src/twg2/parser/tokenizers/IdentifierTokenizer.java

+9-33
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,13 @@
33
import java.util.Arrays;
44

55
import twg2.collections.primitiveCollections.CharArrayList;
6-
import twg2.functions.predicates.CharPredicate;
7-
import twg2.parser.Inclusion;
86
import twg2.parser.condition.text.CharParserMatchable;
9-
import twg2.parser.condition.text.CharParserPredicate;
10-
import twg2.parser.textParser.TextParser;
11-
import twg2.ranges.CharSearchSet;
127
import twg2.text.tokenizer.CharConditionPipe;
138
import twg2.text.tokenizer.CharConditions;
149
import twg2.text.tokenizer.CharParserFactory;
1510
import twg2.text.tokenizer.CharParserMatchableFactory;
16-
import twg2.text.tokenizer.StringParserBuilder;
11+
import twg2.text.tokenizer.Inclusion;
12+
import twg2.text.tokenizer.StringConditions;
1713
import twg2.tuple.Tuples;
1814

1915
/** Static methods for creating C language like identifier parsers (i.e. parsing strings '_myVar', '$num', 'camelCaseStr', etc.)
@@ -31,44 +27,24 @@ public static CharParserFactory createIdentifierWithGenericTypeTokenizer(int max
3127
}
3228

3329

34-
public static CharParserFactory createIdentifierTokenizer() {
35-
CharParserFactory identifierParser = new StringParserBuilder("identifier")
36-
.addConditionMatcher(newIdentifierTokenizer())
37-
.build();
38-
return identifierParser;
39-
}
40-
41-
4230
/**
43-
* @return a basic parser for a string of contiguous characters matching those allowed in identifiers (i.e. 'mySpecialLoopCount', '$thing', or '_stspr')
31+
* @return a basic parser for a string of contiguous characters matching those allowed in identifiers (i.e. 'FancyObject.LoopCount', '$thing', or '_stspr')
4432
*/
45-
public static CharConditions.BaseCharParserMatchable newIdentifierTokenizer() {
46-
CharSearchSet firstCharSet = new CharSearchSet();
47-
firstCharSet.addChar('$');
48-
firstCharSet.addChar('_');
49-
firstCharSet.addRange('a', 'z');
50-
firstCharSet.addRange('A', 'Z');
51-
CharParserPredicate firstCharCheck = (char ch, TextParser parser) -> (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || ch == '$';
52-
53-
CharSearchSet charSet = firstCharSet.copy();
54-
charSet.addRange('0', '9');
55-
CharPredicate charCheck = (char ch) -> (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || ch == '$';
56-
57-
var cond = new CharConditions.ContainsFirstSpecial("identifier", charCheck, firstCharCheck, firstCharSet.toCharList().toArray(), Inclusion.INCLUDE);
58-
return cond;
33+
public static CharConditions.BaseCharParserMatchable createIdentifierTokenizer() {
34+
return CharConditions.Identifier.newInstance("identifier");
5935
}
6036

6137

6238
/**
6339
* @return a compound identifier parser (i.e. can parse 'Aa.Bb.Cc' as one identifier token')
6440
*/
6541
public static CharParserMatchable createCompoundIdentifierTokenizer() {
66-
var identifierParser = Arrays.asList(newIdentifierTokenizer());
67-
var separatorParser = Arrays.asList(new CharConditions.Literal("identifier namespace separator", CharArrayList.of('.'), Inclusion.INCLUDE));
42+
var identifierParser = createIdentifierTokenizer();
6843

6944
return CharConditionPipe.createPipeOptionalSuffix("compound identifier (nullable)",
70-
Arrays.asList(CharConditionPipe.createPipeRepeatableSeparator("compound identifier", identifierParser, separatorParser)),
71-
Arrays.asList(new CharConditions.Literal("nullable '?' type", CharArrayList.of('?'), Inclusion.INCLUDE))
45+
Arrays.asList(identifierParser),
46+
Arrays.asList(new CharConditions.Literal("nullable '?' suffix", CharArrayList.of('?'), Inclusion.INCLUDE)
47+
, new StringConditions.Literal("params '...' suffix", new String[] { "..." }, Inclusion.INCLUDE))
7248
);
7349
}
7450

0 commit comments

Comments
 (0)