From 6ef47987522a8cd5c994c724e27efdd0e9a9ea51 Mon Sep 17 00:00:00 2001
From: Magese <magese@live.cn>
Date: Fri, 31 Dec 2021 17:43:40 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E5=8F=8A=E6=B3=A8=E9=87=8A?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=EF=BC=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../java/org/wltea/analyzer/core/Lexeme.java  | 402 ++++++++++--------
 1 file changed, 219 insertions(+), 183 deletions(-)
diff --git a/src/main/java/org/wltea/analyzer/core/Lexeme.java b/src/main/java/org/wltea/analyzer/core/Lexeme.java
index 242f450..c120f12 100644
--- a/src/main/java/org/wltea/analyzer/core/Lexeme.java
+++ b/src/main/java/org/wltea/analyzer/core/Lexeme.java
@@ -31,242 +31,278 @@ package org.wltea.analyzer.core;
  * IK词元对象
  */
 @SuppressWarnings("unused")
-public class Lexeme implements Comparable<Lexeme>{
-	//英文
-	static final int TYPE_ENGLISH = 1;
-	//数字
-	static final int TYPE_ARABIC = 2;
-	//英文数字混合
-	static final int TYPE_LETTER = 3;
-	//中文词元
-	static final int TYPE_CNWORD = 4;
-	//中文单字
-	static final int TYPE_CNCHAR = 64;
-	//日韩文字
-	static final int TYPE_OTHER_CJK = 8;
-	//中文数词
-	static final int TYPE_CNUM = 16;
-	//中文量词
-	static final int TYPE_COUNT = 32;
-	//中文数量词
-	static final int TYPE_CQUAN = 48;
-
-	//词元的起始位移
-	private int offset;
-    //词元的相对起始位置
+public class Lexeme implements Comparable<Lexeme> {
+    /**
+     * 英文
+     */
+    static final int TYPE_ENGLISH = 1;
+    /**
+     * 数字
+     */
+    static final int TYPE_ARABIC = 2;
+    /**
+     * 英文数字混合
+     */
+    static final int TYPE_LETTER = 3;
+    /**
+     * 中文词元
+     */
+    static final int TYPE_CNWORD = 4;
+    /**
+     * 中文单字
+     */
+    static final int TYPE_CNCHAR = 64;
+    /**
+     * 日韩文字
+     */
+    static final int TYPE_OTHER_CJK = 8;
+    /**
+     * 中文数词
+     */
+    static final int TYPE_CNUM = 16;
+    /**
+     * 中文量词
+     */
+    static final int TYPE_COUNT = 32;
+    /**
+     * 中文数量词
+     */
+    static final int TYPE_CQUAN = 48;
+    /**
+     * 词元的起始位移
+     */
+    private int offset;
+    /**
+     * 词元的相对起始位置
+     */
     private int begin;
-    //词元的长度
+    /**
+     * 词元的长度
+     */
     private int length;
-    //词元文本
+    /**
+     * 词元文本
+     */
     private String lexemeText;
-    //词元类型
+    /**
+     * 词元类型
+     */
     private int lexemeType;
 
 
-	public Lexeme(int offset , int begin , int length , int lexemeType){
-		this.offset = offset;
-		this.begin = begin;
-		if(length < 0){
-			throw new IllegalArgumentException("length < 0");
-		}
-		this.length = length;
-		this.lexemeType = lexemeType;
-	}
+    public Lexeme(int offset, int begin, int length, int lexemeType) {
+        this.offset = offset;
+        this.begin = begin;
+        if (length < 0) {
+            throw new IllegalArgumentException("length < 0");
+        }
+        this.length = length;
+        this.lexemeType = lexemeType;
+    }
 
     /*
      * 判断词元相等算法
      * 起始位置偏移、起始位置、终止位置相同
      * @see java.lang.Object#equals(Object o)
      */
-	public boolean equals(Object o){
-		if(o == null){
-			return false;
-		}
+    public boolean equals(Object o) {
+        if (o == null) {
+            return false;
+        }
 
-		if(this == o){
-			return true;
-		}
+        if (this == o) {
+            return true;
+        }
 
-		if(o instanceof Lexeme){
-			Lexeme other = (Lexeme)o;
-			return this.offset == other.getOffset()
-					&& this.begin == other.getBegin()
-					&& this.length == other.getLength();
-		}else{
-			return false;
-		}
-	}
+        if (o instanceof Lexeme) {
+            Lexeme other = (Lexeme) o;
+            return this.offset == other.getOffset()
+                    && this.begin == other.getBegin()
+                    && this.length == other.getLength();
+        } else {
+            return false;
+        }
+    }
 
     /*
      * 词元哈希编码算法
      * @see java.lang.Object#hashCode()
      */
-    public int hashCode(){
-    	int absBegin = getBeginPosition();
-    	int absEnd = getEndPosition();
-    	return  (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
+    public int hashCode() {
+        int absBegin = getBeginPosition();
+        int absEnd = getEndPosition();
+        return (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
     }
 
     /*
      * 词元在排序集合中的比较算法
      * @see java.lang.Comparable#compareTo(java.lang.Object)
      */
-	public int compareTo(Lexeme other) {
-		//起始位置优先
-        if(this.begin < other.getBegin()){
+    public int compareTo(Lexeme other) {
+        // 起始位置优先
+        if (this.begin < other.getBegin()) {
             return -1;
-        }else if(this.begin == other.getBegin()){
-        	//词元长度优先
-			//this.length < other.getLength()
-			return Integer.compare(other.getLength(), this.length);
+        } else if (this.begin == other.getBegin()) {
+            // 词元长度优先
+            // this.length < other.getLength()
+            return Integer.compare(other.getLength(), this.length);
 
-        }else{//this.begin > other.getBegin()
-        	return 1;
+        } else {
+            return 1;
         }
-	}
+    }
 
-	private int getOffset() {
-		return offset;
-	}
+    private int getOffset() {
+        return offset;
+    }
 
-	public void setOffset(int offset) {
-		this.offset = offset;
-	}
+    public void setOffset(int offset) {
+        this.offset = offset;
+    }
 
-	int getBegin() {
-		return begin;
-	}
-	/**
-	 * 获取词元在文本中的起始位置
-	 * @return int
-	 */
-	public int getBeginPosition(){
-		return offset + begin;
-	}
+    int getBegin() {
+        return begin;
+    }
 
-	public void setBegin(int begin) {
-		this.begin = begin;
-	}
+    /**
+     * 获取词元在文本中的起始位置
+     *
+     * @return int
+     */
+    public int getBeginPosition() {
+        return offset + begin;
+    }
 
-	/**
-	 * 获取词元在文本中的结束位置
-	 * @return int
-	 */
-	public int getEndPosition(){
-		return offset + begin + length;
-	}
+    public void setBegin(int begin) {
+        this.begin = begin;
+    }
 
-	/**
-	 * 获取词元的字符长度
-	 * @return int
-	 */
-	public int getLength(){
-		return this.length;
-	}
+    /**
+     * 获取词元在文本中的结束位置
+     *
+     * @return int
+     */
+    public int getEndPosition() {
+        return offset + begin + length;
+    }
 
-	public void setLength(int length) {
-		if(this.length < 0){
-			throw new IllegalArgumentException("length < 0");
-		}
-		this.length = length;
-	}
+    /**
+     * 获取词元的字符长度
+     *
+     * @return int
+     */
+    public int getLength() {
+        return this.length;
+    }
 
-	/**
-	 * 获取词元的文本内容
-	 * @return String
-	 */
-	public String getLexemeText() {
-		if(lexemeText == null){
-			return "";
-		}
-		return lexemeText;
-	}
+    public void setLength(int length) {
+        if (this.length < 0) {
+            throw new IllegalArgumentException("length < 0");
+        }
+        this.length = length;
+    }
 
-	void setLexemeText(String lexemeText) {
-		if(lexemeText == null){
-			this.lexemeText = "";
-			this.length = 0;
-		}else{
-			this.lexemeText = lexemeText;
-			this.length = lexemeText.length();
-		}
-	}
+    /**
+     * 获取词元的文本内容
+     *
+     * @return String
+     */
+    public String getLexemeText() {
+        if (lexemeText == null) {
+            return "";
+        }
+        return lexemeText;
+    }
 
-	/**
-	 * 获取词元类型
-	 * @return int
-	 */
-	int getLexemeType() {
-		return lexemeType;
-	}
+    void setLexemeText(String lexemeText) {
+        if (lexemeText == null) {
+            this.lexemeText = "";
+            this.length = 0;
+        } else {
+            this.lexemeText = lexemeText;
+            this.length = lexemeText.length();
+        }
+    }
 
-	/**
-	 * 获取词元类型标示字符串
-	 * @return String
-	 */
-	public String getLexemeTypeString(){
-		switch(lexemeType) {
+    /**
+     * 获取词元类型
+     *
+     * @return int
+     */
+    int getLexemeType() {
+        return lexemeType;
+    }
 
-		case TYPE_ENGLISH :
-			return "ENGLISH";
+    /**
+     * 获取词元类型标示字符串
+     *
+     * @return String
+     */
+    public String getLexemeTypeString() {
+        switch (lexemeType) {
 
-		case TYPE_ARABIC :
-			return "ARABIC";
+            case TYPE_ENGLISH:
+                return "ENGLISH";
 
-		case TYPE_LETTER :
-			return "LETTER";
+            case TYPE_ARABIC:
+                return "ARABIC";
 
-		case TYPE_CNWORD :
-			return "CN_WORD";
+            case TYPE_LETTER:
+                return "LETTER";
 
-		case TYPE_CNCHAR :
-			return "CN_CHAR";
+            case TYPE_CNWORD:
+                return "CN_WORD";
 
-		case TYPE_OTHER_CJK :
-			return "OTHER_CJK";
+            case TYPE_CNCHAR:
+                return "CN_CHAR";
 
-		case TYPE_COUNT :
-			return "COUNT";
+            case TYPE_OTHER_CJK:
+                return "OTHER_CJK";
 
-		case TYPE_CNUM :
-			return "TYPE_CNUM";
+            case TYPE_COUNT:
+                return "COUNT";
 
-		case TYPE_CQUAN:
-			return "TYPE_CQUAN";
+            case TYPE_CNUM:
+                return "TYPE_CNUM";
 
-		default :
-			return "UNKONW";
-		}
-	}
+            case TYPE_CQUAN:
+                return "TYPE_CQUAN";
+
+            default:
+                return "UNKNOWN";
+        }
+    }
 
 
-	public void setLexemeType(int lexemeType) {
-		this.lexemeType = lexemeType;
-	}
+    public void setLexemeType(int lexemeType) {
+        this.lexemeType = lexemeType;
+    }
 
-	/**
-	 * 合并两个相邻的词元
-	 * @return boolean 词元是否成功合并
-	 */
-	boolean append(Lexeme l, int lexemeType){
-		if(l != null && this.getEndPosition() == l.getBeginPosition()){
-			this.length += l.getLength();
-			this.lexemeType = lexemeType;
-			return true;
-		}else {
-			return false;
-		}
-	}
+    /**
+     * 合并两个相邻的词元
+     *
+     * @return boolean 词元是否成功合并
+     */
+    boolean append(Lexeme l, int lexemeType) {
+        if (l != null && this.getEndPosition() == l.getBeginPosition()) {
+            this.length += l.getLength();
+            this.lexemeType = lexemeType;
+            return true;
+        } else {
+            return false;
+        }
+    }
 
-
-	/**
-	 *
-	 */
-	public String toString(){
-		return this.getBeginPosition() + "-" + this.getEndPosition() +
-				" : " + this.lexemeText + " : \t" +
-				this.getLexemeTypeString();
-	}
+    /**
+     * ToString 方法
+     *
+     * @return 字符串输出
+     */
+    public String toString() {
+        return this.getBeginPosition() + "-" + this.getEndPosition() +
+                " : " + this.lexemeText + " : \t" +
+                this.getLexemeTypeString();
+    }
 
 
 }