Skip to content

Commit

Permalink
regex refactoring 2
Browse files Browse the repository at this point in the history
* precompiled regex pattern are faster than String.split()
  (https://shipilev.net/talks/joker-Oct2014-string-catechism.pdf, from page 72)
* there are visitors, which use splitting for every ASTNode ->
  perceptible improvement

I used example from SonarOpenCommunity#1415 and could measure some spead-up (10%?)
The basic profiling with `-agentlib:hprof=cpu=samples,depth=100` was
however not enough to find a root cause
  • Loading branch information
ivangalkin committed Apr 29, 2018
1 parent 64098d6 commit 244c69d
Show file tree
Hide file tree
Showing 12 changed files with 60 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ public class CommentContainsPatternChecker {

private Pattern p;

private static final Pattern EOLPattern = Pattern.compile("\\R");

/**
* CommentContainsPatternChecker
*
Expand All @@ -83,7 +85,7 @@ public void visitToken(Token token) {
String comment = triviaToken.getOriginalValue();
int line = triviaToken.getLine();
if (indexOfIgnoreCase(comment) != -1) {
String[] lines = comment.split("\r\n?|\n");
String[] lines = EOLPattern.split(comment);

for (int i = 0; i < lines.length; i++) {
int start = indexOfIgnoreCase(lines[i]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public class MagicNumberCheck extends SquidCheck<Grammar> {
@Override
public void init() {
subscribeTo(CxxTokenType.NUMBER);
for (String magicNumber : Arrays.asList(exceptions.split(","))) {
for (String magicNumber : exceptions.split(",")) {
magicNumber = magicNumber.trim();
if (!magicNumber.isEmpty()) {
exceptionsSet.add(magicNumber);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import com.sonar.sslr.api.Grammar;
import com.sonar.sslr.api.Token;
import com.sonar.sslr.api.Trivia;

import java.util.regex.Pattern;

import org.sonar.check.Priority;
import org.sonar.check.Rule;
import org.sonar.squidbridge.annotations.ActivatedByDefault;
Expand All @@ -40,12 +43,14 @@
@NoSqale
public class NoSonarCheck extends SquidCheck<Grammar> implements AstAndTokenVisitor {

private static final Pattern EOLPattern = Pattern.compile("\\R");

@Override
public void visitToken(Token token) {
for (Trivia trivia : token.getTrivia()) {
if (trivia.isComment()) {
String[] commentLines = getContext().getCommentAnalyser()
.getContents(trivia.getToken().getOriginalValue()).split("(\r)?\n|\r", -1);
String[] commentLines = EOLPattern
.split(getContext().getCommentAnalyser().getContents(trivia.getToken().getOriginalValue()), -1);
int line = trivia.getToken().getLine();

for (String commentLine : commentLines) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
import java.nio.charset.Charset;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.sonar.check.Priority;
import org.sonar.check.Rule;
import org.sonar.cxx.api.CxxKeyword;
Expand All @@ -50,6 +53,7 @@ public class ReservedNamesCheck extends SquidCheck<Grammar> implements CxxCharse

private static volatile String[] keywords = CxxKeyword.keywordValues();
private Charset charset = Charset.forName("UTF-8");
private static final Pattern defineDeclarationPattern = Pattern.compile("^\\s*#define\\s+([^\\s(]+).*$");

@Override
public void init() {
Expand All @@ -67,9 +71,9 @@ public void visitFile(AstNode astNode) {
int nr = 0;
for (String line : lines) {
nr++;
String[] sub = line.split("^\\s*#define\\s+", 2);
if (sub.length > 1) {
String name = sub[1].split("[\\s(]", 2)[0];
Matcher matcher = defineDeclarationPattern.matcher(line);
if (matcher.matches()) {
String name = matcher.group(1);
if (name.startsWith("_") && name.length() > 1 && Character.isUpperCase(name.charAt(1))) {
getContext().createLineViolation(this,
"Reserved name used for macro (begins with underscore followed by a capital letter)", nr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
//similar Vera++ rule T017
public class UnnamedNamespaceInHeaderCheck extends SquidCheck<Grammar> {

private static final String DEFAULT_NAME_SUFFIX = ".h,.hh,.hpp,.H";
private static final String[] DEFAULT_NAME_SUFFIX = new String[] { ".h", ".hh", ".hpp", ".H" };
private Boolean isHeader;

@Override
public void init() {
Expand All @@ -48,16 +49,20 @@ public void init() {
// settings.getStringArray(CxxPlugin.INCLUDE_DIRECTORIES_KEY)
}

@Override
public void visitFile(AstNode astNode) {
isHeader = isHeader(getContext().getFile().getName());
}

@Override
public void visitNode(AstNode node) {
if (isHeader(getContext().getFile().getName())) {
if (isHeader) {
getContext().createFileViolation(this, "Unnamed namespaces are not allowed in header files.", node);
}
}

private static boolean isHeader(String name) {
String[] suffixes = DEFAULT_NAME_SUFFIX.split(",");
for (String suff : suffixes) {
for (String suff : DEFAULT_NAME_SUFFIX) {
if (name.endsWith(suff)) {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,29 @@
//similar Vera++ rule T018
public class UsingNamespaceInHeaderCheck extends SquidCheck<Grammar> {

private static final String DEFAULT_NAME_SUFFIX = ".h,.hh,.hpp,.H";
private static final String[] DEFAULT_NAME_SUFFIX = new String[] { ".h", ".hh", ".hpp", ".H" };
private Boolean isHeader;

@Override
public void init() {
subscribeTo(CxxGrammarImpl.blockDeclaration);
}

@Override
public void visitFile(AstNode astNode) {
isHeader = isHeader(getContext().getFile().getName());
}

@Override
public void visitNode(AstNode node) {
if (isHeader(getContext().getFile().getName())
if (isHeader
&& "using".equals(node.getTokenValue()) && node.getFirstChild().getChildren().toString().contains("namespace")) {
getContext().createLineViolation(this, "Using namespace are not allowed in header files.", node);
}
}

private static boolean isHeader(String name) {
String[] suffixes = DEFAULT_NAME_SUFFIX.split(",");
for (String suff : suffixes) {
for (String suff : DEFAULT_NAME_SUFFIX) {
if (name.endsWith(suff)) {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.sonar.api.utils.log.Logger;
import org.sonar.api.utils.log.Loggers;
import org.sonar.cxx.sensors.drmemory.DrMemoryParser.DrMemoryError.Location;
import org.sonar.cxx.sensors.utils.CxxUtils;

public final class DrMemoryParser {

Expand Down Expand Up @@ -146,7 +147,7 @@ public static List<DrMemoryError> parse(File file, String charset) {
if (m.find()) {
DrMemoryError error = new DrMemoryError();
error.type = extractErrorType(m.group(1));
String[] elementSplitted = element.split("\\r?\\n");
String[] elementSplitted = CxxUtils.EOLPattern.split(element);
error.message = elementSplitted[0];
for (String elementPart : elementSplitted) {
Matcher locationMatcher = rx_file_finder.matcher(elementPart);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Optional;
import java.util.regex.Pattern;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
Expand All @@ -40,6 +42,7 @@
public final class CxxUtils {

public static final String ERROR_RECOVERY_KEY = "errorRecoveryEnabled";
public static final Pattern EOLPattern = Pattern.compile("\\R");
private static final Logger LOG = Loggers.get(CxxUtils.class);

private CxxUtils() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.sonar.api.utils.log.Loggers;
import org.sonar.cxx.api.CxxKeyword;
import org.sonar.cxx.api.CxxTokenType;
import org.sonar.cxx.sensors.utils.CxxUtils;
import org.sonar.squidbridge.SquidAstVisitor;

public class CxxHighlighterVisitor extends SquidAstVisitor<Grammar> implements AstAndTokenVisitor {
Expand Down Expand Up @@ -91,7 +92,7 @@ private static class CommentLocation extends TokenLocation {
public CommentLocation(Token token) {
super(token);
String value = token.getValue();
String[] lines = value.split("\r\n|\n|\r", -1);
String[] lines = CxxUtils.EOLPattern.split(value, -1);

if (lines.length > 1) {
endLine = token.getLine() + lines.length - 1;
Expand All @@ -102,10 +103,11 @@ public CommentLocation(Token token) {

private static class PreprocessorDirectiveLocation extends TokenLocation {

public static final Pattern preprocessorPattern = Pattern.compile("^[ \t]*#[ \t]*\\w+");

PreprocessorDirectiveLocation(Token token) {
super(token);
Pattern r = Pattern.compile("^[ \t]*#[ \t]*\\w+");
Matcher m = r.matcher(token.getValue());
Matcher m = preprocessorPattern.matcher(token.getValue());
if (m.find()) {
endLineOffset = startLineOffset + (m.end() - m.start());
} else {
Expand Down
5 changes: 4 additions & 1 deletion cxx-squid/src/main/java/org/sonar/cxx/CxxLanguage.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;

import org.sonar.api.config.Configuration;
import org.sonar.api.measures.Metric;
import org.sonar.api.resources.AbstractLanguage;
Expand All @@ -36,6 +38,7 @@ public abstract class CxxLanguage extends AbstractLanguage {
public static final String ERROR_RECOVERY_KEY = "errorRecoveryEnabled";
private final Configuration settings;
private final Map<String, Metric> MetricsCache;
public static final Pattern EOLPattern = Pattern.compile("\\R");

public CxxLanguage(String key, Configuration settings) {
super(key);
Expand Down Expand Up @@ -89,7 +92,7 @@ public Optional<Boolean> IsRecoveryEnabled() {
public String[] getStringLinesOption(String key) {
Optional<String> value = this.settings.get(getPluginProperty(key));
if (value.isPresent()) {
return value.get().split("\r?\n|\r", -1);
return EOLPattern.split(value.get(), -1);
}
return new String[0];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import com.sonar.sslr.api.Trivia;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;

import org.sonar.cxx.api.CxxMetric;
import org.sonar.cxx.api.CxxPunctuator;
import org.sonar.squidbridge.SquidAstVisitor;
Expand All @@ -42,6 +44,7 @@ public class CxxCommentLinesVisitor<GRAMMAR extends Grammar> extends SquidAstVis

private final Set<Integer> comments = new HashSet<>();
private boolean seenFirstToken;
public static final Pattern EOLPattern = Pattern.compile("\\R");

@Override
public void init() {
Expand All @@ -59,8 +62,8 @@ public void visitToken(Token token) {
for (Trivia trivia : token.getTrivia()) {
if (trivia.isComment()) {
if (seenFirstToken) {
String[] commentLines = getContext().getCommentAnalyser().getContents(trivia.getToken().getOriginalValue())
.split("(\r)?\n|\r", -1);
String[] commentLines = EOLPattern
.split(getContext().getCommentAnalyser().getContents(trivia.getToken().getOriginalValue()), -1);
int line = trivia.getToken().getLine();
for (String commentLine : commentLines) {
if (!commentLine.contains("NOSONAR") && !getContext().getCommentAnalyser().isBlank(commentLine)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import com.sonar.sslr.api.AstAndTokenVisitor;
import com.sonar.sslr.api.AstNode;
import static com.sonar.sslr.api.GenericTokenType.EOF;

import java.util.regex.Pattern;

import com.sonar.sslr.api.Grammar;
import com.sonar.sslr.api.Token;
import org.sonar.squidbridge.SquidAstVisitor;
Expand All @@ -37,6 +40,7 @@ public class CxxLinesOfCodeVisitor<GRAMMAR extends Grammar>

private final MetricDef metric;
private int lastTokenLine;
public static final Pattern EOLPattern = Pattern.compile("\\R");

public CxxLinesOfCodeVisitor(MetricDef metric) {
this.metric = metric;
Expand All @@ -57,7 +61,7 @@ public void visitFile(AstNode node) {
public void visitToken(Token token) {
if (!token.getType().equals(EOF)) {
/* Handle all the lines of the token */
String[] tokenLines = token.getValue().split("\n", -1);
String[] tokenLines = EOLPattern.split(token.getValue(), -1);

int firstLineAlreadyCounted = lastTokenLine == token.getLine() ? 1 : 0;
getContext().peekSourceCode().add(metric, (double) tokenLines.length - firstLineAlreadyCounted);
Expand Down

0 comments on commit 244c69d

Please sign in to comment.