日志格式优化；

2021-12-31 17:10:19 +08:00 · 2021-12-31 17:10:19 +08:00 · 3ec8076730
parent 7149c54de7
commit 3ec8076730
1 changed files with 38 additions and 38 deletions
--- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
+++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
@ -39,39 +39,39 @@ import java.util.*;
 */
 class AnalyzeContext {

-    //默认缓冲区大小
+    // 默认缓冲区大小
    private static final int BUFF_SIZE = 4096;
-    //缓冲区耗尽的临界值
+    // 缓冲区耗尽的临界值
    private static final int BUFF_EXHAUST_CRITICAL = 100;


-    //字符窜读取缓冲
+    // 字符窜读取缓冲
    private char[] segmentBuff;
-    //字符类型数组
+    // 字符类型数组
    private int[] charTypes;


-    //记录Reader内已分析的字串总长度
-    //在分多段分析词元时，该变量累计当前的segmentBuff相对于reader起始位置的位移
+    // 记录Reader内已分析的字串总长度
+    // 在分多段分析词元时，该变量累计当前的segmentBuff相对于reader起始位置的位移
    private int buffOffset;
-    //当前缓冲区位置指针
+    // 当前缓冲区位置指针
    private int cursor;
-    //最近一次读入的,可处理的字串长度
+    // 最近一次读入的,可处理的字串长度
    private int available;


-    //子分词器锁
-    //该集合非空，说明有子分词器在占用segmentBuff
+    // 子分词器锁
+    // 该集合非空，说明有子分词器在占用segmentBuff
    private final Set<String> buffLocker;

-    //原始分词结果集合，未经歧义处理
+    // 原始分词结果集合，未经歧义处理
    private QuickSortSet orgLexemes;
-    //LexemePath位置索引表
+    // LexemePath位置索引表
    private final Map<Integer, LexemePath> pathMap;
-    //最终分词结果集
+    // 最终分词结果集
    private final LinkedList<Lexeme> results;

-    //分词器配置项
+    // 分词器配置项
    private final Configuration cfg;

    AnalyzeContext(Configuration cfg) {
@ -113,21 +113,21 @@ class AnalyzeContext {
    int fillBuffer(Reader reader) throws IOException {
        int readCount = 0;
        if (this.buffOffset == 0) {
-            //首次读取reader
+            // 首次读取reader
            readCount = reader.read(segmentBuff);
        } else {
            int offset = this.available - this.cursor;
            if (offset > 0) {
-                //最近一次读取的>最近一次处理的，将未处理的字串拷贝到segmentBuff头部
+                // 最近一次读取的>最近一次处理的，将未处理的字串拷贝到segmentBuff头部
                System.arraycopy(this.segmentBuff, this.cursor, this.segmentBuff, 0, offset);
                readCount = offset;
            }
-            //继续读取reader ，以onceReadIn - onceAnalyzed为起始位置，继续填充segmentBuff剩余的部分
+            // 继续读取reader ，以onceReadIn - onceAnalyzed为起始位置，继续填充segmentBuff剩余的部分
            readCount += reader.read(this.segmentBuff, offset, BUFF_SIZE - offset);
        }
-        //记录最后一次从Reader中读入的可用字符长度
+        // 记录最后一次从Reader中读入的可用字符长度
        this.available = readCount;
-        //重置当前指针
+        // 重置当前指针
        this.cursor = 0;
        return readCount;
    }
@ -251,35 +251,35 @@ class AnalyzeContext {
    void outputToResult() {
        int index = 0;
        while (index <= this.cursor) {
-            //跳过非CJK字符
+            // 跳过非CJK字符
            if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) {
                index++;
                continue;
            }
-            //从pathMap找出对应index位置的LexemePath
+            // 从pathMap找出对应index位置的LexemePath
            LexemePath path = this.pathMap.get(index);
            if (path != null) {
-                //输出LexemePath中的lexeme到results集合
+                // 输出LexemePath中的lexeme到results集合
                Lexeme l = path.pollFirst();
                while (l != null) {
                    this.results.add(l);
-                    //将index移至lexeme后
+                    // 将index移至lexeme后
                    index = l.getBegin() + l.getLength();
                    l = path.pollFirst();
                    if (l != null) {
-                        //输出path内部，词元间遗漏的单字
+                        // 输出path内部，词元间遗漏的单字
                        for (; index < l.getBegin(); index++) {
                            this.outputSingleCJK(index);
                        }
                    }
                }
-            } else {//pathMap中找不到index对应的LexemePath
-                //单字输出
+            } else {// pathMap中找不到index对应的LexemePath
+                // 单字输出
                this.outputSingleCJK(index);
                index++;
            }
        }
-        //清空当前的Map
+        // 清空当前的Map
        this.pathMap.clear();
    }

@ -304,16 +304,16 @@ class AnalyzeContext {
     * 同时处理合并
     */
    Lexeme getNextLexeme() {
-        //从结果集取出，并移除第一个Lexme
+        // 从结果集取出，并移除第一个Lexme
        Lexeme result = this.results.pollFirst();
        while (result != null) {
-            //数量词合并
+            // 数量词合并
            this.compound(result);
            if (Dictionary.getSingleton().isStopWord(this.segmentBuff, result.getBegin(), result.getLength())) {
-                //是停止词继续取列表的下一个
+                // 是停止词继续取列表的下一个
                result = this.results.pollFirst();
            } else {
-                //不是停止词, 生成lexeme的词元文本,输出
+                // 不是停止词, 生成lexeme的词元文本,输出
                result.setLexemeText(String.valueOf(segmentBuff, result.getBegin(), result.getLength()));
                break;
            }
@ -343,7 +343,7 @@ class AnalyzeContext {
        if (!this.cfg.useSmart()) {
            return;
        }
-        //数量词合并处理
+        // 数量词合并处理
        if (!this.results.isEmpty()) {

            if (Lexeme.TYPE_ARABIC == result.getLexemeType()) {
@ -351,29 +351,29 @@ class AnalyzeContext {
                boolean appendOk = false;
                if (nextLexeme != null) {
                    if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) {
-                        //合并英文数词+中文数词
+                        // 合并英文数词+中文数词
                        appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM);
                    } else if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
-                        //合并英文数词+中文量词
+                        // 合并英文数词+中文量词
                        appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
                    }
                }
                if (appendOk) {
-                    //弹出
+                    // 弹出
                    this.results.pollFirst();
                }
            }

-            //可能存在第二轮合并
+            // 可能存在第二轮合并
            if (Lexeme.TYPE_CNUM == result.getLexemeType() && !this.results.isEmpty()) {
                Lexeme nextLexeme = this.results.peekFirst();
                boolean appendOk = false;
                if (Lexeme.TYPE_COUNT == nextLexeme.getLexemeType()) {
-                    //合并中文数词+中文量词
+                    // 合并中文数词+中文量词
                    appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
                }
                if (appendOk) {
-                    //弹出
+                    // 弹出
                    this.results.pollFirst();
                }
            }