File tree 3 files changed +23
-2
lines changed
src/main/java/org/wltea/analyzer
3 files changed +23
-2
lines changed Original file line number Diff line number Diff line change @@ -50,7 +50,8 @@ class AnalyzeContext {
50
50
private int cursor ;
51
51
//最近一次读入的,可处理的字串长度
52
52
private int available ;
53
-
53
+ //末尾非CJK字符数目
54
+ private int lastUselessCharNum ;
54
55
55
56
//子分词器锁
56
57
//该集合非空,说明有子分词器在占用segmentBuff
@@ -107,6 +108,7 @@ int fillBuffer(Reader reader) throws IOException {
107
108
if (this .buffOffset == 0 ) {
108
109
//首次读取reader
109
110
readCount = reader .read (segmentBuff );
111
+ this .lastUselessCharNum = 0 ;
110
112
} else {
111
113
int offset = this .available - this .cursor ;
112
114
if (offset > 0 ) {
@@ -242,8 +244,11 @@ void outputToResult() {
242
244
//跳过非CJK字符
243
245
if (CharacterUtil .CHAR_USELESS == this .charTypes [index ]) {
244
246
index ++;
247
+ this .lastUselessCharNum ++;
245
248
continue ;
246
249
}
250
+ // 清空数值
251
+ this .lastUselessCharNum = 0 ;
247
252
//从pathMap找出对应index位置的LexemePath
248
253
LexemePath path = this .pathMap .get (index );
249
254
if (path != null ) {
@@ -354,6 +359,15 @@ Lexeme getNextLexeme() {
354
359
return result ;
355
360
}
356
361
362
+
363
+ /**
364
+ * 返回末尾非CJK字符字符数目
365
+ */
366
+ public int getLastUselessCharNum () {
367
+ return this .lastUselessCharNum ;
368
+ }
369
+
370
+
357
371
/**
358
372
* 重置分词上下文状态
359
373
*/
Original file line number Diff line number Diff line change @@ -139,4 +139,11 @@ public synchronized void reset(Reader input) {
139
139
segmenter .reset ();
140
140
}
141
141
}
142
+
143
+ /**
144
+ * 返回末尾非CJK字符字符数目
145
+ */
146
+ public int getLastUselessCharNum () {
147
+ return this .context .getLastUselessCharNum ();
148
+ }
142
149
}
Original file line number Diff line number Diff line change @@ -109,7 +109,7 @@ public void reset() throws IOException {
109
109
public void end () throws IOException {
110
110
super .end ();
111
111
// set final offset
112
- int finalOffset = correctOffset (this .endPosition );
112
+ int finalOffset = correctOffset (this .endPosition + _IKImplement . getLastUselessCharNum () );
113
113
offsetAtt .setOffset (finalOffset , finalOffset );
114
114
posIncrAtt .setPositionIncrement (posIncrAtt .getPositionIncrement () + skippedPositions );
115
115
}
You can’t perform that action at this time.
0 commit comments