Skip to content

Commit bd1e3ae

Browse files
committed
Add HitFormatter and {Search}Hit substring properties
Addresses the source-code related part of #2612 since `OGKUnifiedHighlighter` allows straight- forward, alternate formatters. `HistoryContext` is still a custom highlighting, so it will still return HTML-like content.
1 parent 801a11b commit bd1e3ae

File tree

10 files changed

+521
-157
lines changed

10 files changed

+521
-157
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/search/Hit.java

+38
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,16 @@ public class Hit {
6868
*/
6969
private final String path;
7070

71+
/**
72+
* A phrase match's left offset (inclusive) within the line.
73+
*/
74+
private Integer left;
75+
76+
/**
77+
* A phrase match's right offset (exclusive) within the line.
78+
*/
79+
private Integer right;
80+
7181
/**
7282
* Creates a new, possibly-defined instance.
7383
*
@@ -166,4 +176,32 @@ public void setTag(String tag) {
166176
public boolean getAlt() {
167177
return alt;
168178
}
179+
180+
/**
181+
* Gets the left line offset (inclusive) of a phrase match.
182+
*/
183+
public Integer getLeft() {
184+
return this.left;
185+
}
186+
187+
/**
188+
* Sets the left line offset (inclusive) of a phrase match.
189+
*/
190+
public void setLeft(Integer left) {
191+
this.left = left;
192+
}
193+
194+
/**
195+
* Gets the right line offset (exclusive) of a phrase match.
196+
*/
197+
public Integer getRight() {
198+
return this.right;
199+
}
200+
201+
/**
202+
* Sets the right line offset (exclusive) of a phrase match.
203+
*/
204+
public void setRight(Integer right) {
205+
this.right = right;
206+
}
169207
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2018-2019, Chris Fraire <cfraire@me.com>.
22+
*/
23+
24+
package org.opengrok.indexer.search;
25+
26+
import org.apache.lucene.search.uhighlight.Passage;
27+
import org.apache.lucene.search.uhighlight.PassageFormatter;
28+
import org.opengrok.indexer.analysis.Definitions;
29+
import org.opengrok.indexer.search.context.ContextArgs;
30+
import org.opengrok.indexer.search.context.LineHighlight;
31+
import org.opengrok.indexer.search.context.PassageConverter;
32+
import org.opengrok.indexer.search.context.PhraseHighlight;
33+
import org.opengrok.indexer.util.SourceSplitter;
34+
import org.opengrok.indexer.util.StringUtils;
35+
36+
import java.util.ArrayList;
37+
import java.util.List;
38+
import java.util.SortedMap;
39+
import java.util.regex.Matcher;
40+
41+
/**
42+
* Represents a subclass of {@link PassageFormatter} that uses
43+
* {@link PassageConverter} to produce {@link Hit} instances.
44+
*/
45+
public class HitFormatter extends SearchFormatterBase {
46+
47+
private String filename;
48+
49+
/**
50+
* Initializes a formatter for the specified arguments.
51+
*/
52+
public HitFormatter() {
53+
super(new PassageConverter(new ContextArgs((short) 0, Short.MAX_VALUE)));
54+
}
55+
56+
/**
57+
* Gets the source code file name, including optional path.
58+
* @return the full path or {@code null}
59+
*/
60+
public String getFilename() {
61+
return filename;
62+
}
63+
64+
/**
65+
* Sets the source code file name.
66+
* @param value the file name to use
67+
*/
68+
public void setFilename(String value) {
69+
this.filename = value;
70+
}
71+
72+
/**
73+
* Splits {@code originalText} using {@link SourceSplitter}, converts
74+
* passages using {@link PassageConverter}, and formats for returning hits
75+
* through the search API.
76+
* @param passages a required instance
77+
* @param originalText a required instance
78+
* @return a defined list of {@link Hit} instances, which might be empty
79+
*/
80+
@Override
81+
public Object format(Passage[] passages, String originalText) {
82+
83+
updateOriginalText(originalText);
84+
85+
SortedMap<Integer, LineHighlight> lines = cvt.convert(passages, splitter);
86+
List<Hit> res = new ArrayList<>();
87+
for (LineHighlight lhi : lines.values()) {
88+
final int lineOffset = lhi.getLineno();
89+
90+
String line = splitter.getLine(lineOffset);
91+
Matcher eolMatcher = StringUtils.STANDARD_EOL.matcher(line);
92+
if (eolMatcher.find()) {
93+
line = line.substring(0, eolMatcher.start());
94+
}
95+
96+
for (int i = 0; i < lhi.countMarkups(); ++i) {
97+
marks.clear();
98+
PhraseHighlight phi = lhi.getMarkup(i);
99+
checkIfMark(line, phi);
100+
101+
Hit hit = new Hit(filename);
102+
// `binary' is false
103+
hit.setLine(line);
104+
hit.setLineno(String.valueOf(lineOffset + 1)); // to 1-offset
105+
hit.setLeft(phi.getLineStart());
106+
hit.setRight(phi.getLineEnd());
107+
108+
if (defs != null) {
109+
// N.b. use ctags 1-offset vs 0-offset.
110+
List<Definitions.Tag> lineTags = defs.getTags(lineOffset + 1);
111+
if (lineTags != null) {
112+
Definitions.Tag pickedTag = findTagForMark(lineTags, marks);
113+
if (pickedTag != null) {
114+
hit.setTag(pickedTag.type);
115+
}
116+
}
117+
}
118+
119+
res.add(hit);
120+
}
121+
}
122+
123+
return res;
124+
}
125+
}

0 commit comments

Comments
 (0)