From db4f08894247c92d52581d97c1be66c0efd1754c Mon Sep 17 00:00:00 2001 From: guwirth Date: Tue, 3 Nov 2020 10:42:08 +0100 Subject: [PATCH] improve lexer preprocessor channel - better support special cases like: ```C++ #define A B/**/C #define A "/**/" #define A B // comment ``` - minor refactoring - close #1903 --- .../channels/CharacterLiteralsChannel.java | 8 +- .../cxx/channels/PreprocessorChannel.java | 77 ++++++++++------- .../cxx/channels/StringLiteralsChannel.java | 83 ++++++++++--------- .../lexer/CxxLexerWithPreprocessingTest.java | 15 +++- .../CxxLexer_PreprocessorDisabled_Test.java | 23 +++-- .../parser/PreprocessorDirectivesTest.java | 9 +- .../org/sonar/plugins/cxx/CxxSquidSensor.java | 2 +- 7 files changed, 130 insertions(+), 87 deletions(-) diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java index cd96fb0bfb..9a25a1bf64 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/CharacterLiteralsChannel.java @@ -90,14 +90,14 @@ private void readPrefix(CodeReader code) { private void readUdSuffix(CodeReader code) { for (int start_index = index, len = 0;; index++) { - char c = code.charAt(index); - if (c == EOF) { + char charAt = code.charAt(index); + if (charAt == EOF) { return; } - if (Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_')) { + if (Character.isLowerCase(charAt) || Character.isUpperCase(charAt) || (charAt == '_')) { len++; } else { - if (Character.isDigit(c)) { + if (Character.isDigit(charAt)) { if (len > 0) { len++; } else { diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java index 8a325f70a4..3d9842624c 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/PreprocessorChannel.java @@ -28,18 +28,45 @@ public class PreprocessorChannel extends Channel { private static final char EOF = (char) -1; + private final StringLiteralsChannel stringLiteralsChannel = new StringLiteralsChannel(); + private final StringBuilder sb = new StringBuilder(256); - private static String read(CodeReader code) { - var sb = new StringBuilder(256); - char ch; + @Override + public boolean consume(CodeReader code, Lexer output) { + int line = code.getLinePosition(); + int column = code.getColumnPosition(); + + char charAt = code.charAt(0); + if ((charAt != '#')) { + return false; + } + read(code); + + output.addToken(Token.builder() + .setLine(line) + .setColumn(column) + .setURI(output.getURI()) + .setValueAndOriginalValue(sb.toString()) + .setType(CxxTokenType.PREPROCESSOR) + .build()); + sb.setLength(0); + return true; + } + private void read(CodeReader code) { while (true) { - ch = (char) code.pop(); + char ch = code.charAt(0); if (isNewline(ch) || ch == EOF) { + code.pop(); break; + } else if (stringLiteralsChannel.read(code, sb)) { + continue; } - if (ch == '/' && code.charAt(0) == '*') { - consumeComment(code); + ch = (char) code.pop(); + if (ch == '/' && code.charAt(0) == '/') { + consumeSingleLineComment(code); + } else if (ch == '/' && code.charAt(0) == '*') { + consumeMultiLineComment(code); } else if (ch == '\\' && isNewline((char) code.peek())) { // the newline is escaped: we have a the multi line preprocessor directive // consume both the backslash and the newline, insert a space instead @@ -49,7 +76,6 @@ private static String read(CodeReader code) { sb.append(ch); } } - return sb.toString(); } private static void consumeNewline(CodeReader code) { @@ -63,12 +89,21 @@ private static void consumeNewline(CodeReader code) { } } - private static void consumeComment(CodeReader code) { - char ch; + private static void consumeSingleLineComment(CodeReader code) { + code.pop(); // initial '/' + while (true) { + char charAt = code.charAt(0); + if (isNewline(charAt) || charAt == EOF) { + break; + } + code.pop(); + } + } + private static void consumeMultiLineComment(CodeReader code) { code.pop(); // initial '*' while (true) { - ch = (char) code.pop(); + char ch = (char) code.pop(); if (ch == EOF) { break; } @@ -83,26 +118,4 @@ private static boolean isNewline(char ch) { return (ch == '\n') || (ch == '\r'); } - @Override - public boolean consume(CodeReader code, Lexer output) { - int line = code.getLinePosition(); - int column = code.getColumnPosition(); - - char ch = code.charAt(0); - if ((ch != '#')) { - return false; - } - - String tokenValue = read(code); - output.addToken(Token.builder() - .setLine(line) - .setColumn(column) - .setURI(output.getURI()) - .setValueAndOriginalValue(tokenValue) - .setType(CxxTokenType.PREPROCESSOR) - .build()); - - return true; - } - } diff --git a/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java b/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java index 9d0539c277..4a863bdf13 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/channels/StringLiteralsChannel.java @@ -32,8 +32,7 @@ public class StringLiteralsChannel extends Channel { private static final char EOF = (char) -1; - private final StringBuilder sb = new StringBuilder(256); - + private final StringBuilder csb = new StringBuilder(256); private int index = 0; private char ch = ' '; private boolean isRawString = false; @@ -42,13 +41,28 @@ public class StringLiteralsChannel extends Channel { public boolean consume(CodeReader code, Lexer output) { int line = code.getLinePosition(); int column = code.getColumnPosition(); + if (!read(code, csb)) { + return false; + } + output.addToken(Token.builder() + .setLine(line) + .setColumn(column) + .setURI(output.getURI()) + .setValueAndOriginalValue(csb.toString()) + .setType(CxxTokenType.STRING) + .build()); + csb.setLength(0); + return true; + } + + public boolean read(CodeReader code, StringBuilder sb) { index = 0; readStringPrefix(code); if (ch != '\"') { return false; } if (isRawString) { - if (!readRawString(code)) { + if (!readRawString(code, sb)) { return false; } } else { @@ -60,34 +74,30 @@ public boolean consume(CodeReader code, Lexer output) { for (var i = 0; i < index; i++) { sb.append((char) code.pop()); } - output.addToken(Token.builder() - .setLine(line) - .setColumn(column) - .setURI(output.getURI()) - .setValueAndOriginalValue(sb.toString()) - .setType(CxxTokenType.STRING) - .build()); - sb.setLength(0); return true; } - private boolean readString(CodeReader code) { - index++; - while (code.charAt(index) != ch) { - if (code.charAt(index) == EOF) { - return false; + private void readStringPrefix(CodeReader code) { + ch = code.charAt(index); + isRawString = false; + if ((ch == 'u') || (ch == 'U') || ch == 'L') { + index++; + if (ch == 'u' && code.charAt(index) == '8') { + index++; } - if (code.charAt(index) == '\\') { - // escape + if (code.charAt(index) == ' ') { index++; } + ch = code.charAt(index); + } + if (ch == 'R') { index++; + isRawString = true; + ch = code.charAt(index); } - index++; - return true; } - private boolean readRawString(CodeReader code) { + private boolean readRawString(CodeReader code, StringBuilder sb) { // "delimiter( raw_character* )delimiter" char charAt; index++; @@ -127,36 +137,33 @@ private boolean readRawString(CodeReader code) { return true; } - private void readStringPrefix(CodeReader code) { - ch = code.charAt(index); - isRawString = false; - if ((ch == 'u') || (ch == 'U') || ch == 'L') { - index++; - if (ch == 'u' && code.charAt(index) == '8') { - index++; + private boolean readString(CodeReader code) { + index++; + char charAt; + while ((charAt = code.charAt(index)) != ch) { + if (charAt == EOF) { + return false; } - if (code.charAt(index) == ' ') { + if (charAt == '\\') { + // escape index++; } - ch = code.charAt(index); - } - if (ch == 'R') { index++; - isRawString = true; - ch = code.charAt(index); } + index++; + return true; } private void readUdSuffix(CodeReader code) { for (int start_index = index, len = 0;; index++) { - char c = code.charAt(index); - if (c == EOF) { + char charAt = code.charAt(index); + if (charAt == EOF) { return; } - if (Character.isLowerCase(c) || Character.isUpperCase(c) || (c == '_')) { + if (Character.isLowerCase(charAt) || Character.isUpperCase(charAt) || (charAt == '_')) { len++; } else { - if (Character.isDigit(c)) { + if (Character.isDigit(charAt)) { if (len > 0) { len++; } else { diff --git a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithPreprocessingTest.java b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithPreprocessingTest.java index 3892cfda84..2631030bbd 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithPreprocessingTest.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexerWithPreprocessingTest.java @@ -891,7 +891,20 @@ public void hashhash_operator_problem() { List tokens = lexer.lex("#define A B(cf)\n" + "#define B(n) 0x##n\n" + "A"); - assertThat(tokens).anySatisfy(token -> assertThat(token).isValue("0xcf").hasType(CxxKeyword.INT)); + assertThat(tokens).anySatisfy(token -> assertThat(token) + .isValue("0xcf") + .hasType(CxxKeyword.INT)); + } + + @Test + public void string_problem_1903() { + List tokens = lexer.lex("void f1() {}\n" + + "#define BROKEN_DEFINE \" /a/path/*\"\n" + + "void f2() {}"); + + var softly = new SoftAssertions(); + softly.assertThat(tokens).hasSize(13); + softly.assertAll(); } } diff --git a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexer_PreprocessorDisabled_Test.java b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexer_PreprocessorDisabled_Test.java index 9168b5d55f..02300d3ac7 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexer_PreprocessorDisabled_Test.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/lexer/CxxLexer_PreprocessorDisabled_Test.java @@ -67,16 +67,27 @@ public void preprocessor_continued_define() { } @Test - public void preprocessor_directive_with_multiline_comment() { + public void preprocessor_directive_with_comment() { var softly = new SoftAssertions(); - softly.assertThat(lexer.lex("#define A B/*CCC*/\n")).anySatisfy(token -> assertThat(token).isValue("#define A B") + softly.assertThat(lexer.lex("#define A B*/\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A B*/") .hasType(CxxTokenType.PREPROCESSOR)); - softly.assertThat(lexer.lex("#define A B/**/C\n")).anySatisfy(token -> assertThat(token).isValue("#define A BC") + softly.assertThat(lexer.lex("#define A B/*CCC*/\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A B") .hasType(CxxTokenType.PREPROCESSOR)); - softly.assertThat(lexer.lex("#define A B/*C\n\n\nC*/\n")).anySatisfy(token -> assertThat(token).isValue( - "#define A B").hasType(CxxTokenType.PREPROCESSOR)); - softly.assertThat(lexer.lex("#define A B*/\n")).anySatisfy(token -> assertThat(token).isValue("#define A B*/") + softly.assertThat(lexer.lex("#define A B/**/C\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A BC") .hasType(CxxTokenType.PREPROCESSOR)); + softly.assertThat(lexer.lex("#define A B/*C\n\n\nC*/D\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A BD").hasType(CxxTokenType.PREPROCESSOR)); + softly.assertThat(lexer.lex("#define A \"a/*\" B\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A \"a/*\" B").hasType(CxxTokenType.PREPROCESSOR)); + softly.assertThat(lexer.lex("#define A \"-str/*\"-/*CCC*/\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A \"-str/*\"-").hasType(CxxTokenType.PREPROCESSOR)); + softly.assertThat(lexer.lex("#define A B/*-\"str\"-*/C\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A BC").hasType(CxxTokenType.PREPROCESSOR)); + softly.assertThat(lexer.lex("#define A B//-/*-\"str\"-*/\n")).anySatisfy(token -> assertThat(token) + .isValue("#define A B").hasType(CxxTokenType.PREPROCESSOR)); softly.assertAll(); } diff --git a/cxx-squid/src/test/java/org/sonar/cxx/parser/PreprocessorDirectivesTest.java b/cxx-squid/src/test/java/org/sonar/cxx/parser/PreprocessorDirectivesTest.java index 58ffaa3dc1..08b1251d4d 100644 --- a/cxx-squid/src/test/java/org/sonar/cxx/parser/PreprocessorDirectivesTest.java +++ b/cxx-squid/src/test/java/org/sonar/cxx/parser/PreprocessorDirectivesTest.java @@ -91,11 +91,10 @@ public void object_like_macros() { + "A;"))) .isEqualTo("a ; EOF"); - //@todo - // assert (serialize(p.parse( - // "#define A_B A/*Comment*/B\n" - // +" A_B;")) - // .equals("A B ; EOF")); + assertThat(serialize(p.parse( + "#define A_B A/*Comment*/B\n" + + " A_B;")) + .equals("A B ; EOF")); } @Test diff --git a/sonar-cxx-plugin/src/main/java/org/sonar/plugins/cxx/CxxSquidSensor.java b/sonar-cxx-plugin/src/main/java/org/sonar/plugins/cxx/CxxSquidSensor.java index 2531dcad2a..5542ec86d4 100644 --- a/sonar-cxx-plugin/src/main/java/org/sonar/plugins/cxx/CxxSquidSensor.java +++ b/sonar-cxx-plugin/src/main/java/org/sonar/plugins/cxx/CxxSquidSensor.java @@ -465,7 +465,7 @@ private void saveHighlighting(InputFile inputFile, SourceFile sourceFile) { try { newHighlighting.highlight(item.startLine, item.startLineOffset, item.endLine, item.endLineOffset, TypeOfText.forCssClass(item.typeOfText)); - } catch (IllegalArgumentException e) { + } catch (IllegalArgumentException | IllegalStateException e) { // ignore highlight errors: parsing errors could lead to wrong location data LOG.debug("Highlighting error in file '{}' at start:{}:{} end:{}:{}", inputFile.filename(), item.startLine, item.startLineOffset, item.endLine, item.endLineOffset);