代码、注释格式化;

This commit is contained in:
Magese 2021-12-31 17:14:07 +08:00
parent df29bdc4df
commit f9bc7a12fa

View File

@ -27,102 +27,102 @@
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;
import java.util.LinkedList;
import java.util.List;
import org.wltea.analyzer.dic.Dictionary; import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.dic.Hit; import org.wltea.analyzer.dic.Hit;
import java.util.LinkedList;
import java.util.List;
/** /**
* 中文-日韩文子分词器 * 中文-日韩文子分词器
*/ */
class CJKSegmenter implements ISegmenter { class CJKSegmenter implements ISegmenter {
//子分词器标签 // 子分词器标签
private static final String SEGMENTER_NAME = "CJK_SEGMENTER"; private static final String SEGMENTER_NAME = "CJK_SEGMENTER";
//待处理的分词hit队列 // 待处理的分词hit队列
private List<Hit> tmpHits; private final List<Hit> tmpHits;
CJKSegmenter(){ CJKSegmenter() {
this.tmpHits = new LinkedList<>(); this.tmpHits = new LinkedList<>();
} }
/* (non-Javadoc) /* (non-Javadoc)
* @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext) * @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
*/ */
public void analyze(AnalyzeContext context) { public void analyze(AnalyzeContext context) {
if(CharacterUtil.CHAR_USELESS != context.getCurrentCharType()){ if (CharacterUtil.CHAR_USELESS != context.getCurrentCharType()) {
//优先处理tmpHits中的hit // 优先处理tmpHits中的hit
if(!this.tmpHits.isEmpty()){ if (!this.tmpHits.isEmpty()) {
//处理词段队列 // 处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[0]); Hit[] tmpArray = this.tmpHits.toArray(new Hit[0]);
for(Hit hit : tmpArray){ for (Hit hit : tmpArray) {
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit); hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
if(hit.isMatch()){ if (hit.isMatch()) {
//输出当前的词 // 输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), hit.getBegin(), context.getCursor() - hit.getBegin() + 1, Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
if(!hit.isPrefix()){//不是词前缀hit不需要继续匹配移除 if (!hit.isPrefix()) {// 不是词前缀hit不需要继续匹配移除
this.tmpHits.remove(hit); this.tmpHits.remove(hit);
} }
}else if(hit.isUnmatch()){ } else if (hit.isUnmatch()) {
//hit不是词移除 // hit不是词移除
this.tmpHits.remove(hit); this.tmpHits.remove(hit);
} }
} }
} }
//********************************* // *********************************
//再对当前指针位置的字符进行单字匹配 // 再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1); Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词 if (singleCharHit.isMatch()) {// 首字成词
//输出当前的词 // 输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
//同时也是词前缀 // 同时也是词前缀
if(singleCharHit.isPrefix()){ if (singleCharHit.isPrefix()) {
//前缀匹配则放入hit列表 // 前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit); this.tmpHits.add(singleCharHit);
} }
}else if(singleCharHit.isPrefix()){//首字为词前缀 } else if (singleCharHit.isPrefix()) {// 首字为词前缀
//前缀匹配则放入hit列表 // 前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit); this.tmpHits.add(singleCharHit);
} }
}else{ } else {
//遇到CHAR_USELESS字符 // 遇到CHAR_USELESS字符
//清空队列 // 清空队列
this.tmpHits.clear(); this.tmpHits.clear();
} }
//判断缓冲区是否已经读完 // 判断缓冲区是否已经读完
if(context.isBufferConsumed()){ if (context.isBufferConsumed()) {
//清空队列 // 清空队列
this.tmpHits.clear(); this.tmpHits.clear();
} }
//判断是否锁定缓冲区 // 判断是否锁定缓冲区
if(this.tmpHits.size() == 0){ if (this.tmpHits.size() == 0) {
context.unlockBuffer(SEGMENTER_NAME); context.unlockBuffer(SEGMENTER_NAME);
}else{ } else {
context.lockBuffer(SEGMENTER_NAME); context.lockBuffer(SEGMENTER_NAME);
} }
} }
/* (non-Javadoc) /* (non-Javadoc)
* @see org.wltea.analyzer.core.ISegmenter#reset() * @see org.wltea.analyzer.core.ISegmenter#reset()
*/ */
public void reset() { public void reset() {
//清空队列 // 清空队列
this.tmpHits.clear(); this.tmpHits.clear();
} }
} }