代码、注释格式化;

This commit is contained in:
Magese 2021-12-31 17:14:07 +08:00
parent df29bdc4df
commit f9bc7a12fa
1 changed files with 75 additions and 75 deletions

View File

@ -27,102 +27,102 @@
*/
package org.wltea.analyzer.core;
import java.util.LinkedList;
import java.util.List;
import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.dic.Hit;
import java.util.LinkedList;
import java.util.List;
/**
* 中文-日韩文子分词器
* 中文-日韩文子分词器
*/
class CJKSegmenter implements ISegmenter {
//子分词器标签
private static final String SEGMENTER_NAME = "CJK_SEGMENTER";
//待处理的分词hit队列
private List<Hit> tmpHits;
// 子分词器标签
private static final String SEGMENTER_NAME = "CJK_SEGMENTER";
// 待处理的分词hit队列
private final List<Hit> tmpHits;
CJKSegmenter(){
this.tmpHits = new LinkedList<>();
}
CJKSegmenter() {
this.tmpHits = new LinkedList<>();
}
/* (non-Javadoc)
* @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
*/
public void analyze(AnalyzeContext context) {
if(CharacterUtil.CHAR_USELESS != context.getCurrentCharType()){
/* (non-Javadoc)
* @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
*/
public void analyze(AnalyzeContext context) {
if (CharacterUtil.CHAR_USELESS != context.getCurrentCharType()) {
//优先处理tmpHits中的hit
if(!this.tmpHits.isEmpty()){
//处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[0]);
for(Hit hit : tmpArray){
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
if(hit.isMatch()){
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme);
// 优先处理tmpHits中的hit
if (!this.tmpHits.isEmpty()) {
// 处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[0]);
for (Hit hit : tmpArray) {
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
if (hit.isMatch()) {
// 输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), hit.getBegin(), context.getCursor() - hit.getBegin() + 1, Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme);
if(!hit.isPrefix()){//不是词前缀hit不需要继续匹配移除
this.tmpHits.remove(hit);
}
if (!hit.isPrefix()) {// 不是词前缀hit不需要继续匹配移除
this.tmpHits.remove(hit);
}
}else if(hit.isUnmatch()){
//hit不是词移除
this.tmpHits.remove(hit);
}
}
}
} else if (hit.isUnmatch()) {
// hit不是词移除
this.tmpHits.remove(hit);
}
}
}
//*********************************
//再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme);
// *********************************
// 再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if (singleCharHit.isMatch()) {// 首字成词
// 输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_CNWORD);
context.addLexeme(newLexeme);
//同时也是词前缀
if(singleCharHit.isPrefix()){
//前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit);
}
}else if(singleCharHit.isPrefix()){//首字为词前缀
//前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit);
}
// 同时也是词前缀
if (singleCharHit.isPrefix()) {
// 前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit);
}
} else if (singleCharHit.isPrefix()) {// 首字为词前缀
// 前缀匹配则放入hit列表
this.tmpHits.add(singleCharHit);
}
}else{
//遇到CHAR_USELESS字符
//清空队列
this.tmpHits.clear();
}
} else {
// 遇到CHAR_USELESS字符
// 清空队列
this.tmpHits.clear();
}
//判断缓冲区是否已经读完
if(context.isBufferConsumed()){
//清空队列
this.tmpHits.clear();
}
// 判断缓冲区是否已经读完
if (context.isBufferConsumed()) {
// 清空队列
this.tmpHits.clear();
}
//判断是否锁定缓冲区
if(this.tmpHits.size() == 0){
context.unlockBuffer(SEGMENTER_NAME);
// 判断是否锁定缓冲区
if (this.tmpHits.size() == 0) {
context.unlockBuffer(SEGMENTER_NAME);
}else{
context.lockBuffer(SEGMENTER_NAME);
}
}
} else {
context.lockBuffer(SEGMENTER_NAME);
}
}
/* (non-Javadoc)
* @see org.wltea.analyzer.core.ISegmenter#reset()
*/
public void reset() {
//清空队列
this.tmpHits.clear();
}
/* (non-Javadoc)
* @see org.wltea.analyzer.core.ISegmenter#reset()
*/
public void reset() {
// 清空队列
this.tmpHits.clear();
}
}