代码、注释格式化；

2021-12-31 17:14:07 +08:00 · 2021-12-31 17:14:07 +08:00 · f9bc7a12fa
parent df29bdc4df
commit f9bc7a12fa
1 changed files with 75 additions and 75 deletions
--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@ -27,102 +27,102 @@
 */
 package org.wltea.analyzer.core;

-import java.util.LinkedList;
-import java.util.List;
-
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;

+import java.util.LinkedList;
+import java.util.List;
+

 /**
- *  中文-日韩文子分词器
+ * 中文-日韩文子分词器
 */
 class CJKSegmenter implements ISegmenter {

-	//子分词器标签
-	private static final String SEGMENTER_NAME = "CJK_SEGMENTER";
-	//待处理的分词hit队列
-	private List<Hit> tmpHits;
+    // 子分词器标签
+    private static final String SEGMENTER_NAME = "CJK_SEGMENTER";
+    // 待处理的分词hit队列
+    private final List<Hit> tmpHits;


-	CJKSegmenter(){
-		this.tmpHits = new LinkedList<>();
-	}
+    CJKSegmenter() {
+        this.tmpHits = new LinkedList<>();
+    }

-	/* (non-Javadoc)
-	 * @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
-	 */
-	public void analyze(AnalyzeContext context) {
-		if(CharacterUtil.CHAR_USELESS != context.getCurrentCharType()){
+    /* (non-Javadoc)
+     * @see org.wltea.analyzer.core.ISegmenter#analyze(org.wltea.analyzer.core.AnalyzeContext)
+     */
+    public void analyze(AnalyzeContext context) {
+        if (CharacterUtil.CHAR_USELESS != context.getCurrentCharType()) {

-			//优先处理tmpHits中的hit
-			if(!this.tmpHits.isEmpty()){
-				//处理词段队列
-				Hit[] tmpArray = this.tmpHits.toArray(new Hit[0]);
-				for(Hit hit : tmpArray){
-					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
-					if(hit.isMatch()){
-						//输出当前的词
-						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
-						context.addLexeme(newLexeme);
+            // 优先处理tmpHits中的hit
+            if (!this.tmpHits.isEmpty()) {
+                // 处理词段队列
+                Hit[] tmpArray = this.tmpHits.toArray(new Hit[0]);
+                for (Hit hit : tmpArray) {
+                    hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor(), hit);
+                    if (hit.isMatch()) {
+                        // 输出当前的词
+                        Lexeme newLexeme = new Lexeme(context.getBufferOffset(), hit.getBegin(), context.getCursor() - hit.getBegin() + 1, Lexeme.TYPE_CNWORD);
+                        context.addLexeme(newLexeme);

-						if(!hit.isPrefix()){//不是词前缀，hit不需要继续匹配，移除
-							this.tmpHits.remove(hit);
-						}
+                        if (!hit.isPrefix()) {// 不是词前缀，hit不需要继续匹配，移除
+                            this.tmpHits.remove(hit);
+                        }

-					}else if(hit.isUnmatch()){
-						//hit不是词，移除
-						this.tmpHits.remove(hit);
-					}
-				}
-			}
+                    } else if (hit.isUnmatch()) {
+                        // hit不是词，移除
+                        this.tmpHits.remove(hit);
+                    }
+                }
+            }

-			//*********************************
-			//再对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
-			if(singleCharHit.isMatch()){//首字成词
-				//输出当前的词
-				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
-				context.addLexeme(newLexeme);
+            // *********************************
+            // 再对当前指针位置的字符进行单字匹配
+            Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
+            if (singleCharHit.isMatch()) {// 首字成词
+                // 输出当前的词
+                Lexeme newLexeme = new Lexeme(context.getBufferOffset(), context.getCursor(), 1, Lexeme.TYPE_CNWORD);
+                context.addLexeme(newLexeme);

-				//同时也是词前缀
-				if(singleCharHit.isPrefix()){
-					//前缀匹配则放入hit列表
-					this.tmpHits.add(singleCharHit);
-				}
-			}else if(singleCharHit.isPrefix()){//首字为词前缀
-				//前缀匹配则放入hit列表
-				this.tmpHits.add(singleCharHit);
-			}
+                // 同时也是词前缀
+                if (singleCharHit.isPrefix()) {
+                    // 前缀匹配则放入hit列表
+                    this.tmpHits.add(singleCharHit);
+                }
+            } else if (singleCharHit.isPrefix()) {// 首字为词前缀
+                // 前缀匹配则放入hit列表
+                this.tmpHits.add(singleCharHit);
+            }


-		}else{
-			//遇到CHAR_USELESS字符
-			//清空队列
-			this.tmpHits.clear();
-		}
+        } else {
+            // 遇到CHAR_USELESS字符
+            // 清空队列
+            this.tmpHits.clear();
+        }

-		//判断缓冲区是否已经读完
-		if(context.isBufferConsumed()){
-			//清空队列
-			this.tmpHits.clear();
-		}
+        // 判断缓冲区是否已经读完
+        if (context.isBufferConsumed()) {
+            // 清空队列
+            this.tmpHits.clear();
+        }

-		//判断是否锁定缓冲区
-		if(this.tmpHits.size() == 0){
-			context.unlockBuffer(SEGMENTER_NAME);
+        // 判断是否锁定缓冲区
+        if (this.tmpHits.size() == 0) {
+            context.unlockBuffer(SEGMENTER_NAME);

-		}else{
-			context.lockBuffer(SEGMENTER_NAME);
-		}
-	}
+        } else {
+            context.lockBuffer(SEGMENTER_NAME);
+        }
+    }

-	/* (non-Javadoc)
-	 * @see org.wltea.analyzer.core.ISegmenter#reset()
-	 */
-	public void reset() {
-		//清空队列
-		this.tmpHits.clear();
-	}
+    /* (non-Javadoc)
+     * @see org.wltea.analyzer.core.ISegmenter#reset()
+     */
+    public void reset() {
+        // 清空队列
+        this.tmpHits.clear();
+    }

 }