Skip to content

Commit

Permalink
Merge pull request #1973 from guwirth/fix-1903
Browse files Browse the repository at this point in the history
improve lexer preprocessor channel
  • Loading branch information
guwirth authored Nov 3, 2020
2 parents 2e3f0c7 + db4f088 commit cc91d8a
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ private void readPrefix(CodeReader code) {

private void readUdSuffix(CodeReader code) {
for (int start_index = index, len = 0;; index++) {
char c = code.charAt(index);
if (c == EOF) {
char charAt = code.charAt(index);
if (charAt == EOF) {
return;
}
if (Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_')) {
if (Character.isLowerCase(charAt) || Character.isUpperCase(charAt) || (charAt == '_')) {
len++;
} else {
if (Character.isDigit(c)) {
if (Character.isDigit(charAt)) {
if (len > 0) {
len++;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,45 @@
public class PreprocessorChannel extends Channel<Lexer> {

private static final char EOF = (char) -1;
private final StringLiteralsChannel stringLiteralsChannel = new StringLiteralsChannel();
private final StringBuilder sb = new StringBuilder(256);

private static String read(CodeReader code) {
var sb = new StringBuilder(256);
char ch;
@Override
public boolean consume(CodeReader code, Lexer output) {
int line = code.getLinePosition();
int column = code.getColumnPosition();

char charAt = code.charAt(0);
if ((charAt != '#')) {
return false;
}
read(code);

output.addToken(Token.builder()
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(sb.toString())
.setType(CxxTokenType.PREPROCESSOR)
.build());
sb.setLength(0);
return true;
}

private void read(CodeReader code) {
while (true) {
ch = (char) code.pop();
char ch = code.charAt(0);
if (isNewline(ch) || ch == EOF) {
code.pop();
break;
} else if (stringLiteralsChannel.read(code, sb)) {
continue;
}
if (ch == '/' && code.charAt(0) == '*') {
consumeComment(code);
ch = (char) code.pop();
if (ch == '/' && code.charAt(0) == '/') {
consumeSingleLineComment(code);
} else if (ch == '/' && code.charAt(0) == '*') {
consumeMultiLineComment(code);
} else if (ch == '\\' && isNewline((char) code.peek())) {
// the newline is escaped: we have a the multi line preprocessor directive
// consume both the backslash and the newline, insert a space instead
Expand All @@ -49,7 +76,6 @@ private static String read(CodeReader code) {
sb.append(ch);
}
}
return sb.toString();
}

private static void consumeNewline(CodeReader code) {
Expand All @@ -63,12 +89,21 @@ private static void consumeNewline(CodeReader code) {
}
}

private static void consumeComment(CodeReader code) {
char ch;
private static void consumeSingleLineComment(CodeReader code) {
code.pop(); // initial '/'
while (true) {
char charAt = code.charAt(0);
if (isNewline(charAt) || charAt == EOF) {
break;
}
code.pop();
}
}

private static void consumeMultiLineComment(CodeReader code) {
code.pop(); // initial '*'
while (true) {
ch = (char) code.pop();
char ch = (char) code.pop();
if (ch == EOF) {
break;
}
Expand All @@ -83,26 +118,4 @@ private static boolean isNewline(char ch) {
return (ch == '\n') || (ch == '\r');
}

@Override
public boolean consume(CodeReader code, Lexer output) {
int line = code.getLinePosition();
int column = code.getColumnPosition();

char ch = code.charAt(0);
if ((ch != '#')) {
return false;
}

String tokenValue = read(code);
output.addToken(Token.builder()
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(tokenValue)
.setType(CxxTokenType.PREPROCESSOR)
.build());

return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ public class StringLiteralsChannel extends Channel<Lexer> {

private static final char EOF = (char) -1;

private final StringBuilder sb = new StringBuilder(256);

private final StringBuilder csb = new StringBuilder(256);
private int index = 0;
private char ch = ' ';
private boolean isRawString = false;
Expand All @@ -42,13 +41,28 @@ public class StringLiteralsChannel extends Channel<Lexer> {
public boolean consume(CodeReader code, Lexer output) {
int line = code.getLinePosition();
int column = code.getColumnPosition();
if (!read(code, csb)) {
return false;
}
output.addToken(Token.builder()
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(csb.toString())
.setType(CxxTokenType.STRING)
.build());
csb.setLength(0);
return true;
}

public boolean read(CodeReader code, StringBuilder sb) {
index = 0;
readStringPrefix(code);
if (ch != '\"') {
return false;
}
if (isRawString) {
if (!readRawString(code)) {
if (!readRawString(code, sb)) {
return false;
}
} else {
Expand All @@ -60,34 +74,30 @@ public boolean consume(CodeReader code, Lexer output) {
for (var i = 0; i < index; i++) {
sb.append((char) code.pop());
}
output.addToken(Token.builder()
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(sb.toString())
.setType(CxxTokenType.STRING)
.build());
sb.setLength(0);
return true;
}

private boolean readString(CodeReader code) {
index++;
while (code.charAt(index) != ch) {
if (code.charAt(index) == EOF) {
return false;
private void readStringPrefix(CodeReader code) {
ch = code.charAt(index);
isRawString = false;
if ((ch == 'u') || (ch == 'U') || ch == 'L') {
index++;
if (ch == 'u' && code.charAt(index) == '8') {
index++;
}
if (code.charAt(index) == '\\') {
// escape
if (code.charAt(index) == ' ') {
index++;
}
ch = code.charAt(index);
}
if (ch == 'R') {
index++;
isRawString = true;
ch = code.charAt(index);
}
index++;
return true;
}

private boolean readRawString(CodeReader code) {
private boolean readRawString(CodeReader code, StringBuilder sb) {
// "delimiter( raw_character* )delimiter"
char charAt;
index++;
Expand Down Expand Up @@ -127,36 +137,33 @@ private boolean readRawString(CodeReader code) {
return true;
}

private void readStringPrefix(CodeReader code) {
ch = code.charAt(index);
isRawString = false;
if ((ch == 'u') || (ch == 'U') || ch == 'L') {
index++;
if (ch == 'u' && code.charAt(index) == '8') {
index++;
private boolean readString(CodeReader code) {
index++;
char charAt;
while ((charAt = code.charAt(index)) != ch) {
if (charAt == EOF) {
return false;
}
if (code.charAt(index) == ' ') {
if (charAt == '\\') {
// escape
index++;
}
ch = code.charAt(index);
}
if (ch == 'R') {
index++;
isRawString = true;
ch = code.charAt(index);
}
index++;
return true;
}

private void readUdSuffix(CodeReader code) {
for (int start_index = index, len = 0;; index++) {
char c = code.charAt(index);
if (c == EOF) {
char charAt = code.charAt(index);
if (charAt == EOF) {
return;
}
if (Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_')) {
if (Character.isLowerCase(charAt) || Character.isUpperCase(charAt) || (charAt == '_')) {
len++;
} else {
if (Character.isDigit(c)) {
if (Character.isDigit(charAt)) {
if (len > 0) {
len++;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,20 @@ public void hashhash_operator_problem() {
List<Token> tokens = lexer.lex("#define A B(cf)\n"
+ "#define B(n) 0x##n\n"
+ "A");
assertThat(tokens).anySatisfy(token -> assertThat(token).isValue("0xcf").hasType(CxxKeyword.INT));
assertThat(tokens).anySatisfy(token -> assertThat(token)
.isValue("0xcf")
.hasType(CxxKeyword.INT));
}

@Test
public void string_problem_1903() {
List<Token> tokens = lexer.lex("void f1() {}\n"
+ "#define BROKEN_DEFINE \" /a/path/*\"\n"
+ "void f2() {}");

var softly = new SoftAssertions();
softly.assertThat(tokens).hasSize(13);
softly.assertAll();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,27 @@ public void preprocessor_continued_define() {
}

@Test
public void preprocessor_directive_with_multiline_comment() {
public void preprocessor_directive_with_comment() {
var softly = new SoftAssertions();
softly.assertThat(lexer.lex("#define A B/*CCC*/\n")).anySatisfy(token -> assertThat(token).isValue("#define A B")
softly.assertThat(lexer.lex("#define A B*/\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A B*/")
.hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A B/**/C\n")).anySatisfy(token -> assertThat(token).isValue("#define A BC")
softly.assertThat(lexer.lex("#define A B/*CCC*/\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A B")
.hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A B/*C\n\n\nC*/\n")).anySatisfy(token -> assertThat(token).isValue(
"#define A B").hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A B*/\n")).anySatisfy(token -> assertThat(token).isValue("#define A B*/")
softly.assertThat(lexer.lex("#define A B/**/C\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A BC")
.hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A B/*C\n\n\nC*/D\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A BD").hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A \"a/*\" B\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A \"a/*\" B").hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A \"-str/*\"-/*CCC*/\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A \"-str/*\"-").hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A B/*-\"str\"-*/C\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A BC").hasType(CxxTokenType.PREPROCESSOR));
softly.assertThat(lexer.lex("#define A B//-/*-\"str\"-*/\n")).anySatisfy(token -> assertThat(token)
.isValue("#define A B").hasType(CxxTokenType.PREPROCESSOR));
softly.assertAll();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ public void object_like_macros() {
+ "A;")))
.isEqualTo("a ; EOF");

//@todo
// assert (serialize(p.parse(
// "#define A_B A/*Comment*/B\n"
// +" A_B;"))
// .equals("A B ; EOF"));
assertThat(serialize(p.parse(
"#define A_B A/*Comment*/B\n"
+ " A_B;"))
.equals("A B ; EOF"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ private void saveHighlighting(InputFile inputFile, SourceFile sourceFile) {
try {
newHighlighting.highlight(item.startLine, item.startLineOffset, item.endLine, item.endLineOffset,
TypeOfText.forCssClass(item.typeOfText));
} catch (IllegalArgumentException e) {
} catch (IllegalArgumentException | IllegalStateException e) {
// ignore highlight errors: parsing errors could lead to wrong location data
LOG.debug("Highlighting error in file '{}' at start:{}:{} end:{}:{}", inputFile.filename(),
item.startLine, item.startLineOffset, item.endLine, item.endLineOffset);
Expand Down

0 comments on commit cc91d8a

Please sign in to comment.