Skip to content

Commit 7d6798b

Browse files
committed
同步个bug修复
同步个修改过来 infinilabs#998
1 parent fb56f05 commit 7d6798b

File tree

3 files changed

+23
-2
lines changed

3 files changed

+23
-2
lines changed

src/main/java/org/wltea/analyzer/core/AnalyzeContext.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class AnalyzeContext {
5050
private int cursor;
5151
//最近一次读入的,可处理的字串长度
5252
private int available;
53-
53+
//末尾非CJK字符数目
54+
private int lastUselessCharNum;
5455

5556
//子分词器锁
5657
//该集合非空,说明有子分词器在占用segmentBuff
@@ -107,6 +108,7 @@ int fillBuffer(Reader reader) throws IOException {
107108
if (this.buffOffset == 0) {
108109
//首次读取reader
109110
readCount = reader.read(segmentBuff);
111+
this.lastUselessCharNum = 0;
110112
} else {
111113
int offset = this.available - this.cursor;
112114
if (offset > 0) {
@@ -242,8 +244,11 @@ void outputToResult() {
242244
//跳过非CJK字符
243245
if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) {
244246
index++;
247+
this.lastUselessCharNum++;
245248
continue;
246249
}
250+
// 清空数值
251+
this.lastUselessCharNum = 0;
247252
//从pathMap找出对应index位置的LexemePath
248253
LexemePath path = this.pathMap.get(index);
249254
if (path != null) {
@@ -354,6 +359,15 @@ Lexeme getNextLexeme() {
354359
return result;
355360
}
356361

362+
363+
/**
364+
* 返回末尾非CJK字符字符数目
365+
*/
366+
public int getLastUselessCharNum() {
367+
return this.lastUselessCharNum;
368+
}
369+
370+
357371
/**
358372
* 重置分词上下文状态
359373
*/

src/main/java/org/wltea/analyzer/core/IKSegmenter.java

+7
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,11 @@ public synchronized void reset(Reader input) {
139139
segmenter.reset();
140140
}
141141
}
142+
143+
/**
144+
* 返回末尾非CJK字符字符数目
145+
*/
146+
public int getLastUselessCharNum() {
147+
return this.context.getLastUselessCharNum();
148+
}
142149
}

src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ public void reset() throws IOException {
109109
public void end() throws IOException {
110110
super.end();
111111
// set final offset
112-
int finalOffset = correctOffset(this.endPosition);
112+
int finalOffset = correctOffset(this.endPosition + _IKImplement.getLastUselessCharNum());
113113
offsetAtt.setOffset(finalOffset, finalOffset);
114114
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
115115
}

0 commit comments

Comments
 (0)