注释格式化;

This commit is contained in:
Magese 2021-12-31 17:38:43 +08:00
parent 5ab517079b
commit 56f23a9027
1 changed files with 40 additions and 40 deletions

View File

@ -34,14 +34,18 @@ import java.util.Arrays;
*/ */
class LetterSegmenter implements ISegmenter { class LetterSegmenter implements ISegmenter {
//子分词器标签 /**
* 子分词器标签
*/
private static final String SEGMENTER_NAME = "LETTER_SEGMENTER"; private static final String SEGMENTER_NAME = "LETTER_SEGMENTER";
//链接符号 /**
* 链接符号
*/
private static final char[] Letter_Connector = new char[]{'#', '&', '+', '-', '.', '@', '_'}; private static final char[] Letter_Connector = new char[]{'#', '&', '+', '-', '.', '@', '_'};
/**
//数字符号 * 数字符号
*/
private static final char[] Num_Connector = new char[]{',', '.'}; private static final char[] Num_Connector = new char[]{',', '.'};
/* /*
* 词元的开始位置 * 词元的开始位置
* 同时作为子分词器状态标识 * 同时作为子分词器状态标识
@ -53,22 +57,18 @@ class LetterSegmenter implements ISegmenter {
* end记录的是在词元中最后一个出现的Letter但非Sign_Connector的字符的位置 * end记录的是在词元中最后一个出现的Letter但非Sign_Connector的字符的位置
*/ */
private int end; private int end;
/* /*
* 字母起始位置 * 字母起始位置
*/ */
private int englishStart; private int englishStart;
/* /*
* 字母结束位置 * 字母结束位置
*/ */
private int englishEnd; private int englishEnd;
/* /*
* 阿拉伯数字起始位置 * 阿拉伯数字起始位置
*/ */
private int arabicStart; private int arabicStart;
/* /*
* 阿拉伯数字结束位置 * 阿拉伯数字结束位置
*/ */
@ -91,18 +91,18 @@ class LetterSegmenter implements ISegmenter {
*/ */
public void analyze(AnalyzeContext context) { public void analyze(AnalyzeContext context) {
boolean bufferLockFlag; boolean bufferLockFlag;
//处理英文字母 // 处理英文字母
bufferLockFlag = this.processEnglishLetter(context); bufferLockFlag = this.processEnglishLetter(context);
//处理阿拉伯字母 // 处理阿拉伯字母
bufferLockFlag = this.processArabicLetter(context) || bufferLockFlag; bufferLockFlag = this.processArabicLetter(context) || bufferLockFlag;
//处理混合字母(这个要放最后处理可以通过QuickSortSet排除重复) // 处理混合字母(这个要放最后处理可以通过QuickSortSet排除重复)
bufferLockFlag = this.processMixLetter(context) || bufferLockFlag; bufferLockFlag = this.processMixLetter(context) || bufferLockFlag;
//判断是否锁定缓冲区 // 判断是否锁定缓冲区
if (bufferLockFlag) { if (bufferLockFlag) {
context.lockBuffer(SEGMENTER_NAME); context.lockBuffer(SEGMENTER_NAME);
} else { } else {
//对缓冲区解锁 // 对缓冲区解锁
context.unlockBuffer(SEGMENTER_NAME); context.unlockBuffer(SEGMENTER_NAME);
} }
} }
@ -128,26 +128,26 @@ class LetterSegmenter implements ISegmenter {
private boolean processMixLetter(AnalyzeContext context) { private boolean processMixLetter(AnalyzeContext context) {
boolean needLock; boolean needLock;
if (this.start == -1) {//当前的分词器尚未开始处理字符 if (this.start == -1) {// 当前的分词器尚未开始处理字符
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType() if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) { || CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
//记录起始指针的位置,标明分词器进入处理状态 // 记录起始指针的位置,标明分词器进入处理状态
this.start = context.getCursor(); this.start = context.getCursor();
this.end = start; this.end = start;
} }
} else {//当前的分词器正在处理字符 } else {// 当前的分词器正在处理字符
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType() if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()
|| CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) { || CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
//记录下可能的结束位置 // 记录下可能的结束位置
this.end = context.getCursor(); this.end = context.getCursor();
} else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType() } else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
&& this.isLetterConnector(context.getCurrentChar())) { && this.isLetterConnector(context.getCurrentChar())) {
//记录下可能的结束位置 // 记录下可能的结束位置
this.end = context.getCursor(); this.end = context.getCursor();
} else { } else {
//遇到非Letter字符输出词元 // 遇到非Letter字符输出词元
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.start, this.end - this.start + 1, Lexeme.TYPE_LETTER); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.start, this.end - this.start + 1, Lexeme.TYPE_LETTER);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
this.start = -1; this.start = -1;
@ -155,10 +155,10 @@ class LetterSegmenter implements ISegmenter {
} }
} }
//判断缓冲区是否已经读完 // 判断缓冲区是否已经读完
if (context.isBufferConsumed()) { if (context.isBufferConsumed()) {
if (this.start != -1 && this.end != -1) { if (this.start != -1 && this.end != -1) {
//缓冲以读完输出词元 // 缓冲以读完输出词元
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.start, this.end - this.start + 1, Lexeme.TYPE_LETTER); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.start, this.end - this.start + 1, Lexeme.TYPE_LETTER);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
this.start = -1; this.start = -1;
@ -166,7 +166,7 @@ class LetterSegmenter implements ISegmenter {
} }
} }
//判断是否锁定缓冲区 // 判断是否锁定缓冲区
needLock = this.start != -1 || this.end != -1; needLock = this.start != -1 || this.end != -1;
return needLock; return needLock;
} }
@ -179,18 +179,18 @@ class LetterSegmenter implements ISegmenter {
private boolean processEnglishLetter(AnalyzeContext context) { private boolean processEnglishLetter(AnalyzeContext context) {
boolean needLock; boolean needLock;
if (this.englishStart == -1) {//当前的分词器尚未开始处理英文字符 if (this.englishStart == -1) {// 当前的分词器尚未开始处理英文字符
if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) { if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
//记录起始指针的位置,标明分词器进入处理状态 // 记录起始指针的位置,标明分词器进入处理状态
this.englishStart = context.getCursor(); this.englishStart = context.getCursor();
this.englishEnd = this.englishStart; this.englishEnd = this.englishStart;
} }
} else {//当前的分词器正在处理英文字符 } else {// 当前的分词器正在处理英文字符
if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) { if (CharacterUtil.CHAR_ENGLISH == context.getCurrentCharType()) {
//记录当前指针位置为结束位置 // 记录当前指针位置为结束位置
this.englishEnd = context.getCursor(); this.englishEnd = context.getCursor();
} else { } else {
//遇到非English字符,输出词元 // 遇到非English字符,输出词元
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.englishStart, this.englishEnd - this.englishStart + 1, Lexeme.TYPE_ENGLISH); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.englishStart, this.englishEnd - this.englishStart + 1, Lexeme.TYPE_ENGLISH);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
this.englishStart = -1; this.englishStart = -1;
@ -198,10 +198,10 @@ class LetterSegmenter implements ISegmenter {
} }
} }
//判断缓冲区是否已经读完 // 判断缓冲区是否已经读完
if (context.isBufferConsumed()) { if (context.isBufferConsumed()) {
if (this.englishStart != -1 && this.englishEnd != -1) { if (this.englishStart != -1 && this.englishEnd != -1) {
//缓冲以读完输出词元 // 缓冲以读完输出词元
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.englishStart, this.englishEnd - this.englishStart + 1, Lexeme.TYPE_ENGLISH); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.englishStart, this.englishEnd - this.englishStart + 1, Lexeme.TYPE_ENGLISH);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
this.englishStart = -1; this.englishStart = -1;
@ -209,7 +209,7 @@ class LetterSegmenter implements ISegmenter {
} }
} }
//判断是否锁定缓冲区 // 判断是否锁定缓冲区
needLock = this.englishStart != -1 || this.englishEnd != -1; needLock = this.englishStart != -1 || this.englishEnd != -1;
return needLock; return needLock;
} }
@ -222,21 +222,21 @@ class LetterSegmenter implements ISegmenter {
private boolean processArabicLetter(AnalyzeContext context) { private boolean processArabicLetter(AnalyzeContext context) {
boolean needLock; boolean needLock;
if (this.arabicStart == -1) {//当前的分词器尚未开始处理数字字符 if (this.arabicStart == -1) {// 当前的分词器尚未开始处理数字字符
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) { if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) {
//记录起始指针的位置,标明分词器进入处理状态 // 记录起始指针的位置,标明分词器进入处理状态
this.arabicStart = context.getCursor(); this.arabicStart = context.getCursor();
this.arabicEnd = this.arabicStart; this.arabicEnd = this.arabicStart;
} }
} else {//当前的分词器正在处理数字字符 } else {// 当前的分词器正在处理数字字符
if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) { if (CharacterUtil.CHAR_ARABIC == context.getCurrentCharType()) {
//记录当前指针位置为结束位置 // 记录当前指针位置为结束位置
this.arabicEnd = context.getCursor(); this.arabicEnd = context.getCursor();
}/* else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType() }/* else if (CharacterUtil.CHAR_USELESS == context.getCurrentCharType()
&& this.isNumConnector(context.getCurrentChar())) { && this.isNumConnector(context.getCurrentChar())) {
//不输出数字但不标记结束 // 不输出数字但不标记结束
}*/ else { }*/ else {
////遇到非Arabic字符,输出词元 // //遇到非Arabic字符,输出词元
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.arabicStart, this.arabicEnd - this.arabicStart + 1, Lexeme.TYPE_ARABIC); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.arabicStart, this.arabicEnd - this.arabicStart + 1, Lexeme.TYPE_ARABIC);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
this.arabicStart = -1; this.arabicStart = -1;
@ -244,10 +244,10 @@ class LetterSegmenter implements ISegmenter {
} }
} }
//判断缓冲区是否已经读完 // 判断缓冲区是否已经读完
if (context.isBufferConsumed()) { if (context.isBufferConsumed()) {
if (this.arabicStart != -1 && this.arabicEnd != -1) { if (this.arabicStart != -1 && this.arabicEnd != -1) {
//生成已切分的词元 // 生成已切分的词元
Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.arabicStart, this.arabicEnd - this.arabicStart + 1, Lexeme.TYPE_ARABIC); Lexeme newLexeme = new Lexeme(context.getBufferOffset(), this.arabicStart, this.arabicEnd - this.arabicStart + 1, Lexeme.TYPE_ARABIC);
context.addLexeme(newLexeme); context.addLexeme(newLexeme);
this.arabicStart = -1; this.arabicStart = -1;
@ -255,7 +255,7 @@ class LetterSegmenter implements ISegmenter {
} }
} }
//判断是否锁定缓冲区 // 判断是否锁定缓冲区
needLock = this.arabicStart != -1 || this.arabicEnd != -1; needLock = this.arabicStart != -1 || this.arabicEnd != -1;
return needLock; return needLock;
} }