代码及注释格式化;

This commit is contained in:
Magese 2021-12-31 17:43:40 +08:00
parent 56f23a9027
commit 6ef4798752
1 changed files with 219 additions and 183 deletions

View File

@ -31,242 +31,278 @@ package org.wltea.analyzer.core;
* IK词元对象 * IK词元对象
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public class Lexeme implements Comparable<Lexeme>{ public class Lexeme implements Comparable<Lexeme> {
//英文 /**
static final int TYPE_ENGLISH = 1; * 英文
//数字 */
static final int TYPE_ARABIC = 2; static final int TYPE_ENGLISH = 1;
//英文数字混合 /**
static final int TYPE_LETTER = 3; * 数字
//中文词元 */
static final int TYPE_CNWORD = 4; static final int TYPE_ARABIC = 2;
//中文单字 /**
static final int TYPE_CNCHAR = 64; * 英文数字混合
//日韩文字 */
static final int TYPE_OTHER_CJK = 8; static final int TYPE_LETTER = 3;
//中文数词 /**
static final int TYPE_CNUM = 16; * 中文词元
//中文量词 */
static final int TYPE_COUNT = 32; static final int TYPE_CNWORD = 4;
//中文数量词 /**
static final int TYPE_CQUAN = 48; * 中文单字
*/
//词元的起始位移 static final int TYPE_CNCHAR = 64;
private int offset; /**
//词元的相对起始位置 * 日韩文字
*/
static final int TYPE_OTHER_CJK = 8;
/**
* 中文数词
*/
static final int TYPE_CNUM = 16;
/**
* 中文量词
*/
static final int TYPE_COUNT = 32;
/**
* 中文数量词
*/
static final int TYPE_CQUAN = 48;
/**
* 词元的起始位移
*/
private int offset;
/**
* 词元的相对起始位置
*/
private int begin; private int begin;
//词元的长度 /**
* 词元的长度
*/
private int length; private int length;
//词元文本 /**
* 词元文本
*/
private String lexemeText; private String lexemeText;
//词元类型 /**
* 词元类型
*/
private int lexemeType; private int lexemeType;
public Lexeme(int offset , int begin , int length , int lexemeType){ public Lexeme(int offset, int begin, int length, int lexemeType) {
this.offset = offset; this.offset = offset;
this.begin = begin; this.begin = begin;
if(length < 0){ if (length < 0) {
throw new IllegalArgumentException("length < 0"); throw new IllegalArgumentException("length < 0");
} }
this.length = length; this.length = length;
this.lexemeType = lexemeType; this.lexemeType = lexemeType;
} }
/* /*
* 判断词元相等算法 * 判断词元相等算法
* 起始位置偏移起始位置终止位置相同 * 起始位置偏移起始位置终止位置相同
* @see java.lang.Object#equals(Object o) * @see java.lang.Object#equals(Object o)
*/ */
public boolean equals(Object o){ public boolean equals(Object o) {
if(o == null){ if (o == null) {
return false; return false;
} }
if(this == o){ if (this == o) {
return true; return true;
} }
if(o instanceof Lexeme){ if (o instanceof Lexeme) {
Lexeme other = (Lexeme)o; Lexeme other = (Lexeme) o;
return this.offset == other.getOffset() return this.offset == other.getOffset()
&& this.begin == other.getBegin() && this.begin == other.getBegin()
&& this.length == other.getLength(); && this.length == other.getLength();
}else{ } else {
return false; return false;
} }
} }
/* /*
* 词元哈希编码算法 * 词元哈希编码算法
* @see java.lang.Object#hashCode() * @see java.lang.Object#hashCode()
*/ */
public int hashCode(){ public int hashCode() {
int absBegin = getBeginPosition(); int absBegin = getBeginPosition();
int absEnd = getEndPosition(); int absEnd = getEndPosition();
return (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11; return (absBegin * 37) + (absEnd * 31) + ((absBegin * absEnd) % getLength()) * 11;
} }
/* /*
* 词元在排序集合中的比较算法 * 词元在排序集合中的比较算法
* @see java.lang.Comparable#compareTo(java.lang.Object) * @see java.lang.Comparable#compareTo(java.lang.Object)
*/ */
public int compareTo(Lexeme other) { public int compareTo(Lexeme other) {
//起始位置优先 // 起始位置优先
if(this.begin < other.getBegin()){ if (this.begin < other.getBegin()) {
return -1; return -1;
}else if(this.begin == other.getBegin()){ } else if (this.begin == other.getBegin()) {
//词元长度优先 // 词元长度优先
//this.length < other.getLength() // this.length < other.getLength()
return Integer.compare(other.getLength(), this.length); return Integer.compare(other.getLength(), this.length);
}else{//this.begin > other.getBegin() } else {
return 1; return 1;
} }
} }
private int getOffset() { private int getOffset() {
return offset; return offset;
} }
public void setOffset(int offset) { public void setOffset(int offset) {
this.offset = offset; this.offset = offset;
} }
int getBegin() { int getBegin() {
return begin; return begin;
} }
/**
* 获取词元在文本中的起始位置
* @return int
*/
public int getBeginPosition(){
return offset + begin;
}
public void setBegin(int begin) { /**
this.begin = begin; * 获取词元在文本中的起始位置
} *
* @return int
*/
public int getBeginPosition() {
return offset + begin;
}
/** public void setBegin(int begin) {
* 获取词元在文本中的结束位置 this.begin = begin;
* @return int }
*/
public int getEndPosition(){
return offset + begin + length;
}
/** /**
* 获取词元的字符长度 * 获取词元在文本中的结束位置
* @return int *
*/ * @return int
public int getLength(){ */
return this.length; public int getEndPosition() {
} return offset + begin + length;
}
public void setLength(int length) { /**
if(this.length < 0){ * 获取词元的字符长度
throw new IllegalArgumentException("length < 0"); *
} * @return int
this.length = length; */
} public int getLength() {
return this.length;
}
/** public void setLength(int length) {
* 获取词元的文本内容 if (this.length < 0) {
* @return String throw new IllegalArgumentException("length < 0");
*/ }
public String getLexemeText() { this.length = length;
if(lexemeText == null){ }
return "";
}
return lexemeText;
}
void setLexemeText(String lexemeText) { /**
if(lexemeText == null){ * 获取词元的文本内容
this.lexemeText = ""; *
this.length = 0; * @return String
}else{ */
this.lexemeText = lexemeText; public String getLexemeText() {
this.length = lexemeText.length(); if (lexemeText == null) {
} return "";
} }
return lexemeText;
}
/** void setLexemeText(String lexemeText) {
* 获取词元类型 if (lexemeText == null) {
* @return int this.lexemeText = "";
*/ this.length = 0;
int getLexemeType() { } else {
return lexemeType; this.lexemeText = lexemeText;
} this.length = lexemeText.length();
}
}
/** /**
* 获取词元类型标示字符串 * 获取词元类型
* @return String *
*/ * @return int
public String getLexemeTypeString(){ */
switch(lexemeType) { int getLexemeType() {
return lexemeType;
}
case TYPE_ENGLISH : /**
return "ENGLISH"; * 获取词元类型标示字符串
*
* @return String
*/
public String getLexemeTypeString() {
switch (lexemeType) {
case TYPE_ARABIC : case TYPE_ENGLISH:
return "ARABIC"; return "ENGLISH";
case TYPE_LETTER : case TYPE_ARABIC:
return "LETTER"; return "ARABIC";
case TYPE_CNWORD : case TYPE_LETTER:
return "CN_WORD"; return "LETTER";
case TYPE_CNCHAR : case TYPE_CNWORD:
return "CN_CHAR"; return "CN_WORD";
case TYPE_OTHER_CJK : case TYPE_CNCHAR:
return "OTHER_CJK"; return "CN_CHAR";
case TYPE_COUNT : case TYPE_OTHER_CJK:
return "COUNT"; return "OTHER_CJK";
case TYPE_CNUM : case TYPE_COUNT:
return "TYPE_CNUM"; return "COUNT";
case TYPE_CQUAN: case TYPE_CNUM:
return "TYPE_CQUAN"; return "TYPE_CNUM";
default : case TYPE_CQUAN:
return "UNKONW"; return "TYPE_CQUAN";
}
} default:
return "UNKNOWN";
}
}
public void setLexemeType(int lexemeType) { public void setLexemeType(int lexemeType) {
this.lexemeType = lexemeType; this.lexemeType = lexemeType;
} }
/** /**
* 合并两个相邻的词元 * 合并两个相邻的词元
* @return boolean 词元是否成功合并 *
*/ * @return boolean 词元是否成功合并
boolean append(Lexeme l, int lexemeType){ */
if(l != null && this.getEndPosition() == l.getBeginPosition()){ boolean append(Lexeme l, int lexemeType) {
this.length += l.getLength(); if (l != null && this.getEndPosition() == l.getBeginPosition()) {
this.lexemeType = lexemeType; this.length += l.getLength();
return true; this.lexemeType = lexemeType;
}else { return true;
return false; } else {
} return false;
} }
}
/**
/** * ToString 方法
* *
*/ * @return 字符串输出
public String toString(){ */
return this.getBeginPosition() + "-" + this.getEndPosition() + public String toString() {
" : " + this.lexemeText + " : \t" + return this.getBeginPosition() + "-" + this.getEndPosition() +
this.getLexemeTypeString(); " : " + this.lexemeText + " : \t" +
} this.getLexemeTypeString();
}
} }