From 244c69df8f12270ff8f29bb819943748c6d8cc32 Mon Sep 17 00:00:00 2001 From: Ivan Galkin Date: Sun, 29 Apr 2018 22:38:47 +0200 Subject: [PATCH] regex refactoring 2 * precompiled regex pattern are faster than String.split() (https://shipilev.net/talks/joker-Oct2014-string-catechism.pdf, from page 72) * there are visitors, which use splitting for every ASTNode -> perceptible improvement I used example from #1415 and could measure some spead-up (10%?) The basic profiling with `-agentlib:hprof=cpu=samples,depth=100` was however not enough to find a root cause --- .../cxx/checks/CommentContainsPatternChecker.java | 4 +++- .../java/org/sonar/cxx/checks/MagicNumberCheck.java | 2 +- .../java/org/sonar/cxx/checks/NoSonarCheck.java | 9 +++++++-- .../org/sonar/cxx/checks/ReservedNamesCheck.java | 10 +++++++--- .../cxx/checks/UnnamedNamespaceInHeaderCheck.java | 13 +++++++++---- .../cxx/checks/UsingNamespaceInHeaderCheck.java | 13 +++++++++---- .../sonar/cxx/sensors/drmemory/DrMemoryParser.java | 3 ++- .../java/org/sonar/cxx/sensors/utils/CxxUtils.java | 3 +++ .../cxx/sensors/visitors/CxxHighlighterVisitor.java | 8 +++++--- .../src/main/java/org/sonar/cxx/CxxLanguage.java | 5 ++++- .../sonar/cxx/visitors/CxxCommentLinesVisitor.java | 7 +++++-- .../sonar/cxx/visitors/CxxLinesOfCodeVisitor.java | 6 +++++- 12 files changed, 60 insertions(+), 23 deletions(-) diff --git a/cxx-checks/src/main/java/org/sonar/cxx/checks/CommentContainsPatternChecker.java b/cxx-checks/src/main/java/org/sonar/cxx/checks/CommentContainsPatternChecker.java index 2e56f40fc5..8893d5d15f 100644 --- a/cxx-checks/src/main/java/org/sonar/cxx/checks/CommentContainsPatternChecker.java +++ b/cxx-checks/src/main/java/org/sonar/cxx/checks/CommentContainsPatternChecker.java @@ -58,6 +58,8 @@ public class CommentContainsPatternChecker { private Pattern p; + private static final Pattern EOLPattern = Pattern.compile("\\R"); + /** * CommentContainsPatternChecker * @@ -83,7 +85,7 @@ public void visitToken(Token token) { String comment = triviaToken.getOriginalValue(); int line = triviaToken.getLine(); if (indexOfIgnoreCase(comment) != -1) { - String[] lines = comment.split("\r\n?|\n"); + String[] lines = EOLPattern.split(comment); for (int i = 0; i < lines.length; i++) { int start = indexOfIgnoreCase(lines[i]); diff --git a/cxx-checks/src/main/java/org/sonar/cxx/checks/MagicNumberCheck.java b/cxx-checks/src/main/java/org/sonar/cxx/checks/MagicNumberCheck.java index 1e24bfbe2b..8819902b6d 100755 --- a/cxx-checks/src/main/java/org/sonar/cxx/checks/MagicNumberCheck.java +++ b/cxx-checks/src/main/java/org/sonar/cxx/checks/MagicNumberCheck.java @@ -62,7 +62,7 @@ public class MagicNumberCheck extends SquidCheck { @Override public void init() { subscribeTo(CxxTokenType.NUMBER); - for (String magicNumber : Arrays.asList(exceptions.split(","))) { + for (String magicNumber : exceptions.split(",")) { magicNumber = magicNumber.trim(); if (!magicNumber.isEmpty()) { exceptionsSet.add(magicNumber); diff --git a/cxx-checks/src/main/java/org/sonar/cxx/checks/NoSonarCheck.java b/cxx-checks/src/main/java/org/sonar/cxx/checks/NoSonarCheck.java index 8c1215e949..1f2daa0f8d 100644 --- a/cxx-checks/src/main/java/org/sonar/cxx/checks/NoSonarCheck.java +++ b/cxx-checks/src/main/java/org/sonar/cxx/checks/NoSonarCheck.java @@ -23,6 +23,9 @@ import com.sonar.sslr.api.Grammar; import com.sonar.sslr.api.Token; import com.sonar.sslr.api.Trivia; + +import java.util.regex.Pattern; + import org.sonar.check.Priority; import org.sonar.check.Rule; import org.sonar.squidbridge.annotations.ActivatedByDefault; @@ -40,12 +43,14 @@ @NoSqale public class NoSonarCheck extends SquidCheck implements AstAndTokenVisitor { + private static final Pattern EOLPattern = Pattern.compile("\\R"); + @Override public void visitToken(Token token) { for (Trivia trivia : token.getTrivia()) { if (trivia.isComment()) { - String[] commentLines = getContext().getCommentAnalyser() - .getContents(trivia.getToken().getOriginalValue()).split("(\r)?\n|\r", -1); + String[] commentLines = EOLPattern + .split(getContext().getCommentAnalyser().getContents(trivia.getToken().getOriginalValue()), -1); int line = trivia.getToken().getLine(); for (String commentLine : commentLines) { diff --git a/cxx-checks/src/main/java/org/sonar/cxx/checks/ReservedNamesCheck.java b/cxx-checks/src/main/java/org/sonar/cxx/checks/ReservedNamesCheck.java index 17818c103c..21eb4aeb19 100644 --- a/cxx-checks/src/main/java/org/sonar/cxx/checks/ReservedNamesCheck.java +++ b/cxx-checks/src/main/java/org/sonar/cxx/checks/ReservedNamesCheck.java @@ -26,6 +26,9 @@ import java.nio.charset.Charset; import java.util.List; import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.sonar.check.Priority; import org.sonar.check.Rule; import org.sonar.cxx.api.CxxKeyword; @@ -50,6 +53,7 @@ public class ReservedNamesCheck extends SquidCheck implements CxxCharse private static volatile String[] keywords = CxxKeyword.keywordValues(); private Charset charset = Charset.forName("UTF-8"); + private static final Pattern defineDeclarationPattern = Pattern.compile("^\\s*#define\\s+([^\\s(]+).*$"); @Override public void init() { @@ -67,9 +71,9 @@ public void visitFile(AstNode astNode) { int nr = 0; for (String line : lines) { nr++; - String[] sub = line.split("^\\s*#define\\s+", 2); - if (sub.length > 1) { - String name = sub[1].split("[\\s(]", 2)[0]; + Matcher matcher = defineDeclarationPattern.matcher(line); + if (matcher.matches()) { + String name = matcher.group(1); if (name.startsWith("_") && name.length() > 1 && Character.isUpperCase(name.charAt(1))) { getContext().createLineViolation(this, "Reserved name used for macro (begins with underscore followed by a capital letter)", nr); diff --git a/cxx-checks/src/main/java/org/sonar/cxx/checks/UnnamedNamespaceInHeaderCheck.java b/cxx-checks/src/main/java/org/sonar/cxx/checks/UnnamedNamespaceInHeaderCheck.java index 173fd05233..daf2003832 100644 --- a/cxx-checks/src/main/java/org/sonar/cxx/checks/UnnamedNamespaceInHeaderCheck.java +++ b/cxx-checks/src/main/java/org/sonar/cxx/checks/UnnamedNamespaceInHeaderCheck.java @@ -39,7 +39,8 @@ //similar Vera++ rule T017 public class UnnamedNamespaceInHeaderCheck extends SquidCheck { - private static final String DEFAULT_NAME_SUFFIX = ".h,.hh,.hpp,.H"; + private static final String[] DEFAULT_NAME_SUFFIX = new String[] { ".h", ".hh", ".hpp", ".H" }; + private Boolean isHeader; @Override public void init() { @@ -48,16 +49,20 @@ public void init() { // settings.getStringArray(CxxPlugin.INCLUDE_DIRECTORIES_KEY) } + @Override + public void visitFile(AstNode astNode) { + isHeader = isHeader(getContext().getFile().getName()); + } + @Override public void visitNode(AstNode node) { - if (isHeader(getContext().getFile().getName())) { + if (isHeader) { getContext().createFileViolation(this, "Unnamed namespaces are not allowed in header files.", node); } } private static boolean isHeader(String name) { - String[] suffixes = DEFAULT_NAME_SUFFIX.split(","); - for (String suff : suffixes) { + for (String suff : DEFAULT_NAME_SUFFIX) { if (name.endsWith(suff)) { return true; } diff --git a/cxx-checks/src/main/java/org/sonar/cxx/checks/UsingNamespaceInHeaderCheck.java b/cxx-checks/src/main/java/org/sonar/cxx/checks/UsingNamespaceInHeaderCheck.java index 1c566c33f5..6163107759 100644 --- a/cxx-checks/src/main/java/org/sonar/cxx/checks/UsingNamespaceInHeaderCheck.java +++ b/cxx-checks/src/main/java/org/sonar/cxx/checks/UsingNamespaceInHeaderCheck.java @@ -39,24 +39,29 @@ //similar Vera++ rule T018 public class UsingNamespaceInHeaderCheck extends SquidCheck { - private static final String DEFAULT_NAME_SUFFIX = ".h,.hh,.hpp,.H"; + private static final String[] DEFAULT_NAME_SUFFIX = new String[] { ".h", ".hh", ".hpp", ".H" }; + private Boolean isHeader; @Override public void init() { subscribeTo(CxxGrammarImpl.blockDeclaration); } + @Override + public void visitFile(AstNode astNode) { + isHeader = isHeader(getContext().getFile().getName()); + } + @Override public void visitNode(AstNode node) { - if (isHeader(getContext().getFile().getName()) + if (isHeader && "using".equals(node.getTokenValue()) && node.getFirstChild().getChildren().toString().contains("namespace")) { getContext().createLineViolation(this, "Using namespace are not allowed in header files.", node); } } private static boolean isHeader(String name) { - String[] suffixes = DEFAULT_NAME_SUFFIX.split(","); - for (String suff : suffixes) { + for (String suff : DEFAULT_NAME_SUFFIX) { if (name.endsWith(suff)) { return true; } diff --git a/cxx-sensors/src/main/java/org/sonar/cxx/sensors/drmemory/DrMemoryParser.java b/cxx-sensors/src/main/java/org/sonar/cxx/sensors/drmemory/DrMemoryParser.java index 13f9e22933..7ee9941985 100644 --- a/cxx-sensors/src/main/java/org/sonar/cxx/sensors/drmemory/DrMemoryParser.java +++ b/cxx-sensors/src/main/java/org/sonar/cxx/sensors/drmemory/DrMemoryParser.java @@ -31,6 +31,7 @@ import org.sonar.api.utils.log.Logger; import org.sonar.api.utils.log.Loggers; import org.sonar.cxx.sensors.drmemory.DrMemoryParser.DrMemoryError.Location; +import org.sonar.cxx.sensors.utils.CxxUtils; public final class DrMemoryParser { @@ -146,7 +147,7 @@ public static List parse(File file, String charset) { if (m.find()) { DrMemoryError error = new DrMemoryError(); error.type = extractErrorType(m.group(1)); - String[] elementSplitted = element.split("\\r?\\n"); + String[] elementSplitted = CxxUtils.EOLPattern.split(element); error.message = elementSplitted[0]; for (String elementPart : elementSplitted) { Matcher locationMatcher = rx_file_finder.matcher(elementPart); diff --git a/cxx-sensors/src/main/java/org/sonar/cxx/sensors/utils/CxxUtils.java b/cxx-sensors/src/main/java/org/sonar/cxx/sensors/utils/CxxUtils.java index ff36571099..b1e8ccecbd 100644 --- a/cxx-sensors/src/main/java/org/sonar/cxx/sensors/utils/CxxUtils.java +++ b/cxx-sensors/src/main/java/org/sonar/cxx/sensors/utils/CxxUtils.java @@ -23,6 +23,8 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.util.Optional; +import java.util.regex.Pattern; + import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; @@ -40,6 +42,7 @@ public final class CxxUtils { public static final String ERROR_RECOVERY_KEY = "errorRecoveryEnabled"; + public static final Pattern EOLPattern = Pattern.compile("\\R"); private static final Logger LOG = Loggers.get(CxxUtils.class); private CxxUtils() { diff --git a/cxx-sensors/src/main/java/org/sonar/cxx/sensors/visitors/CxxHighlighterVisitor.java b/cxx-sensors/src/main/java/org/sonar/cxx/sensors/visitors/CxxHighlighterVisitor.java index 97dd4aeddd..961efe85e6 100644 --- a/cxx-sensors/src/main/java/org/sonar/cxx/sensors/visitors/CxxHighlighterVisitor.java +++ b/cxx-sensors/src/main/java/org/sonar/cxx/sensors/visitors/CxxHighlighterVisitor.java @@ -35,6 +35,7 @@ import org.sonar.api.utils.log.Loggers; import org.sonar.cxx.api.CxxKeyword; import org.sonar.cxx.api.CxxTokenType; +import org.sonar.cxx.sensors.utils.CxxUtils; import org.sonar.squidbridge.SquidAstVisitor; public class CxxHighlighterVisitor extends SquidAstVisitor implements AstAndTokenVisitor { @@ -91,7 +92,7 @@ private static class CommentLocation extends TokenLocation { public CommentLocation(Token token) { super(token); String value = token.getValue(); - String[] lines = value.split("\r\n|\n|\r", -1); + String[] lines = CxxUtils.EOLPattern.split(value, -1); if (lines.length > 1) { endLine = token.getLine() + lines.length - 1; @@ -102,10 +103,11 @@ public CommentLocation(Token token) { private static class PreprocessorDirectiveLocation extends TokenLocation { + public static final Pattern preprocessorPattern = Pattern.compile("^[ \t]*#[ \t]*\\w+"); + PreprocessorDirectiveLocation(Token token) { super(token); - Pattern r = Pattern.compile("^[ \t]*#[ \t]*\\w+"); - Matcher m = r.matcher(token.getValue()); + Matcher m = preprocessorPattern.matcher(token.getValue()); if (m.find()) { endLineOffset = startLineOffset + (m.end() - m.start()); } else { diff --git a/cxx-squid/src/main/java/org/sonar/cxx/CxxLanguage.java b/cxx-squid/src/main/java/org/sonar/cxx/CxxLanguage.java index a28ade7dbc..f3507b706d 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/CxxLanguage.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/CxxLanguage.java @@ -24,6 +24,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.regex.Pattern; + import org.sonar.api.config.Configuration; import org.sonar.api.measures.Metric; import org.sonar.api.resources.AbstractLanguage; @@ -36,6 +38,7 @@ public abstract class CxxLanguage extends AbstractLanguage { public static final String ERROR_RECOVERY_KEY = "errorRecoveryEnabled"; private final Configuration settings; private final Map MetricsCache; + public static final Pattern EOLPattern = Pattern.compile("\\R"); public CxxLanguage(String key, Configuration settings) { super(key); @@ -89,7 +92,7 @@ public Optional IsRecoveryEnabled() { public String[] getStringLinesOption(String key) { Optional value = this.settings.get(getPluginProperty(key)); if (value.isPresent()) { - return value.get().split("\r?\n|\r", -1); + return EOLPattern.split(value.get(), -1); } return new String[0]; } diff --git a/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxCommentLinesVisitor.java b/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxCommentLinesVisitor.java index f8b980dcb3..1eac1ddfcc 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxCommentLinesVisitor.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxCommentLinesVisitor.java @@ -26,6 +26,8 @@ import com.sonar.sslr.api.Trivia; import java.util.HashSet; import java.util.Set; +import java.util.regex.Pattern; + import org.sonar.cxx.api.CxxMetric; import org.sonar.cxx.api.CxxPunctuator; import org.sonar.squidbridge.SquidAstVisitor; @@ -42,6 +44,7 @@ public class CxxCommentLinesVisitor extends SquidAstVis private final Set comments = new HashSet<>(); private boolean seenFirstToken; + public static final Pattern EOLPattern = Pattern.compile("\\R"); @Override public void init() { @@ -59,8 +62,8 @@ public void visitToken(Token token) { for (Trivia trivia : token.getTrivia()) { if (trivia.isComment()) { if (seenFirstToken) { - String[] commentLines = getContext().getCommentAnalyser().getContents(trivia.getToken().getOriginalValue()) - .split("(\r)?\n|\r", -1); + String[] commentLines = EOLPattern + .split(getContext().getCommentAnalyser().getContents(trivia.getToken().getOriginalValue()), -1); int line = trivia.getToken().getLine(); for (String commentLine : commentLines) { if (!commentLine.contains("NOSONAR") && !getContext().getCommentAnalyser().isBlank(commentLine)) { diff --git a/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxLinesOfCodeVisitor.java b/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxLinesOfCodeVisitor.java index a47e32414f..8b2156d2bb 100644 --- a/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxLinesOfCodeVisitor.java +++ b/cxx-squid/src/main/java/org/sonar/cxx/visitors/CxxLinesOfCodeVisitor.java @@ -22,6 +22,9 @@ import com.sonar.sslr.api.AstAndTokenVisitor; import com.sonar.sslr.api.AstNode; import static com.sonar.sslr.api.GenericTokenType.EOF; + +import java.util.regex.Pattern; + import com.sonar.sslr.api.Grammar; import com.sonar.sslr.api.Token; import org.sonar.squidbridge.SquidAstVisitor; @@ -37,6 +40,7 @@ public class CxxLinesOfCodeVisitor private final MetricDef metric; private int lastTokenLine; + public static final Pattern EOLPattern = Pattern.compile("\\R"); public CxxLinesOfCodeVisitor(MetricDef metric) { this.metric = metric; @@ -57,7 +61,7 @@ public void visitFile(AstNode node) { public void visitToken(Token token) { if (!token.getType().equals(EOF)) { /* Handle all the lines of the token */ - String[] tokenLines = token.getValue().split("\n", -1); + String[] tokenLines = EOLPattern.split(token.getValue(), -1); int firstLineAlreadyCounted = lastTokenLine == token.getLine() ? 1 : 0; getContext().peekSourceCode().add(metric, (double) tokenLines.length - firstLineAlreadyCounted);