代码及注释格式化;

This commit is contained in:
Magese 2021-12-31 17:43:40 +08:00
parent 56f23a9027
commit 6ef4798752

View File

@ -31,42 +31,69 @@ package org.wltea.analyzer.core;
* IK词元对象
*/
@SuppressWarnings("unused")
public class Lexeme implements Comparable<Lexeme>{
//英文
public class Lexeme implements Comparable<Lexeme> {
/**
* 英文
*/
static final int TYPE_ENGLISH = 1;
//数字
/**
* 数字
*/
static final int TYPE_ARABIC = 2;
//英文数字混合
/**
* 英文数字混合
*/
static final int TYPE_LETTER = 3;
//中文词元
/**
* 中文词元
*/
static final int TYPE_CNWORD = 4;
//中文单字
/**
* 中文单字
*/
static final int TYPE_CNCHAR = 64;
//日韩文字
/**
* 日韩文字
*/
static final int TYPE_OTHER_CJK = 8;
//中文数词
/**
* 中文数词
*/
static final int TYPE_CNUM = 16;
//中文量词
/**
* 中文量词
*/
static final int TYPE_COUNT = 32;
//中文数量词
/**
* 中文数量词
*/
static final int TYPE_CQUAN = 48;
//词元的起始位移
/**
* 词元的起始位移
*/
private int offset;
//词元的相对起始位置
/**
* 词元的相对起始位置
*/
private int begin;
//词元的长度
/**
* 词元的长度
*/
private int length;
//词元文本
/**
* 词元文本
*/
private String lexemeText;
//词元类型
/**
* 词元类型
*/
private int lexemeType;
public Lexeme(int offset , int begin , int length , int lexemeType){
public Lexeme(int offset, int begin, int length, int lexemeType) {
this.offset = offset;
this.begin = begin;
if(length < 0){
if (length < 0) {
throw new IllegalArgumentException("length < 0");
}
this.length = length;
@ -78,21 +105,21 @@ public class Lexeme implements Comparable<Lexeme>{
* 起始位置偏移起始位置终止位置相同
* @see java.lang.Object#equals(Object o)
*/
public boolean equals(Object o){
if(o == null){
public boolean equals(Object o) {
if (o == null) {
return false;
}
if(this == o){
if (this == o) {
return true;
}
if(o instanceof Lexeme){
Lexeme other = (Lexeme)o;
if (o instanceof Lexeme) {
Lexeme other = (Lexeme) o;
return this.offset == other.getOffset()
&& this.begin == other.getBegin()
&& this.length == other.getLength();
}else{
} else {
return false;
}
}
@ -101,7 +128,7 @@ public class Lexeme implements Comparable<Lexeme>{
* 词元哈希编码算法
* @see java.lang.Object#hashCode()
*/
public int hashCode(){
public int hashCode() {
int absBegin = getBeginPosition();
int absEnd = getEndPosition();
return (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
@ -112,15 +139,15 @@ public class Lexeme implements Comparable<Lexeme>{
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
public int compareTo(Lexeme other) {
//起始位置优先
if(this.begin < other.getBegin()){
// 起始位置优先
if (this.begin < other.getBegin()) {
return -1;
}else if(this.begin == other.getBegin()){
//词元长度优先
//this.length < other.getLength()
} else if (this.begin == other.getBegin()) {
// 词元长度优先
// this.length < other.getLength()
return Integer.compare(other.getLength(), this.length);
}else{//this.begin > other.getBegin()
} else {
return 1;
}
}
@ -136,11 +163,13 @@ public class Lexeme implements Comparable<Lexeme>{
int getBegin() {
return begin;
}
/**
* 获取词元在文本中的起始位置
*
* @return int
*/
public int getBeginPosition(){
public int getBeginPosition() {
return offset + begin;
}
@ -150,22 +179,24 @@ public class Lexeme implements Comparable<Lexeme>{
/**
* 获取词元在文本中的结束位置
*
* @return int
*/
public int getEndPosition(){
public int getEndPosition() {
return offset + begin + length;
}
/**
* 获取词元的字符长度
*
* @return int
*/
public int getLength(){
public int getLength() {
return this.length;
}
public void setLength(int length) {
if(this.length < 0){
if (this.length < 0) {
throw new IllegalArgumentException("length < 0");
}
this.length = length;
@ -173,20 +204,21 @@ public class Lexeme implements Comparable<Lexeme>{
/**
* 获取词元的文本内容
*
* @return String
*/
public String getLexemeText() {
if(lexemeText == null){
if (lexemeText == null) {
return "";
}
return lexemeText;
}
void setLexemeText(String lexemeText) {
if(lexemeText == null){
if (lexemeText == null) {
this.lexemeText = "";
this.length = 0;
}else{
} else {
this.lexemeText = lexemeText;
this.length = lexemeText.length();
}
@ -194,6 +226,7 @@ public class Lexeme implements Comparable<Lexeme>{
/**
* 获取词元类型
*
* @return int
*/
int getLexemeType() {
@ -202,40 +235,41 @@ public class Lexeme implements Comparable<Lexeme>{
/**
* 获取词元类型标示字符串
*
* @return String
*/
public String getLexemeTypeString(){
switch(lexemeType) {
public String getLexemeTypeString() {
switch (lexemeType) {
case TYPE_ENGLISH :
case TYPE_ENGLISH:
return "ENGLISH";
case TYPE_ARABIC :
case TYPE_ARABIC:
return "ARABIC";
case TYPE_LETTER :
case TYPE_LETTER:
return "LETTER";
case TYPE_CNWORD :
case TYPE_CNWORD:
return "CN_WORD";
case TYPE_CNCHAR :
case TYPE_CNCHAR:
return "CN_CHAR";
case TYPE_OTHER_CJK :
case TYPE_OTHER_CJK:
return "OTHER_CJK";
case TYPE_COUNT :
case TYPE_COUNT:
return "COUNT";
case TYPE_CNUM :
case TYPE_CNUM:
return "TYPE_CNUM";
case TYPE_CQUAN:
return "TYPE_CQUAN";
default :
return "UNKONW";
default:
return "UNKNOWN";
}
}
@ -246,23 +280,25 @@ public class Lexeme implements Comparable<Lexeme>{
/**
* 合并两个相邻的词元
*
* @return boolean 词元是否成功合并
*/
boolean append(Lexeme l, int lexemeType){
if(l != null && this.getEndPosition() == l.getBeginPosition()){
boolean append(Lexeme l, int lexemeType) {
if (l != null && this.getEndPosition() == l.getBeginPosition()) {
this.length += l.getLength();
this.lexemeType = lexemeType;
return true;
}else {
} else {
return false;
}
}
/**
* ToString 方法
*
* @return 字符串输出
*/
public String toString(){
public String toString() {
return this.getBeginPosition() + "-" + this.getEndPosition() +
" : " + this.lexemeText + " : \t" +
this.getLexemeTypeString();