优化排序算法逻辑;

This commit is contained in:
Magese 2021-12-31 17:48:21 +08:00
parent 6ef4798752
commit dd7822b6be
1 changed files with 46 additions and 44 deletions

View File

@ -34,11 +34,17 @@ package org.wltea.analyzer.core;
@SuppressWarnings("unused") @SuppressWarnings("unused")
class LexemePath extends QuickSortSet implements Comparable<LexemePath> { class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
//起始位置 /**
* 起始位置
*/
private int pathBegin; private int pathBegin;
//结束 /**
* 结束
*/
private int pathEnd; private int pathEnd;
//词元链的有效字符长度 /**
* 词元链的有效字符长度
*/
private int payloadLength; private int payloadLength;
LexemePath() { LexemePath() {
@ -100,7 +106,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/** /**
* 移除尾部的Lexeme * 移除尾部的Lexeme
*
*/ */
void removeTail() { void removeTail() {
Lexeme tail = this.pollLast(); Lexeme tail = this.pollLast();
@ -117,7 +122,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/** /**
* 检测词元位置交叉有歧义的切分 * 检测词元位置交叉有歧义的切分
*
*/ */
boolean checkCross(Lexeme lexeme) { boolean checkCross(Lexeme lexeme) {
return (lexeme.getBegin() >= this.pathBegin && lexeme.getBegin() < this.pathEnd) return (lexeme.getBegin() >= this.pathBegin && lexeme.getBegin() < this.pathEnd)
@ -141,7 +145,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/** /**
* 获取LexemePath的路径长度 * 获取LexemePath的路径长度
*
*/ */
private int getPathLength() { private int getPathLength() {
return this.pathEnd - this.pathBegin; return this.pathEnd - this.pathBegin;
@ -150,7 +153,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/** /**
* X权重词元长度积 * X权重词元长度积
*
*/ */
private int getXWeight() { private int getXWeight() {
int product = 1; int product = 1;
@ -191,48 +193,48 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
} }
public int compareTo(LexemePath o) { public int compareTo(LexemePath o) {
//比较有效文本长度 // 比较有效文本长度
if (this.payloadLength > o.payloadLength) { if (this.payloadLength > o.payloadLength) {
return -1; return -1;
} else if (this.payloadLength < o.payloadLength) { } else if (this.payloadLength < o.payloadLength) {
return 1; return 1;
} else {
//比较词元个数越少越好
if (this.size() < o.size()) {
return -1;
} else if (this.size() > o.size()) {
return 1;
} else {
//路径跨度越大越好
if (this.getPathLength() > o.getPathLength()) {
return -1;
} else if (this.getPathLength() < o.getPathLength()) {
return 1;
} else {
//根据统计学结论逆向切分概率高于正向切分因此位置越靠后的优先
if (this.pathEnd > o.pathEnd) {
return -1;
} else if (pathEnd < o.pathEnd) {
return 1;
} else {
//词长越平均越好
if (this.getXWeight() > o.getXWeight()) {
return -1;
} else if (this.getXWeight() < o.getXWeight()) {
return 1;
} else {
//词元位置权重比较
if (this.getPWeight() > o.getPWeight()) {
return -1;
} else if (this.getPWeight() < o.getPWeight()) {
return 1;
}
}
}
}
}
} }
// 比较词元个数越少越好
if (this.size() < o.size()) {
return -1;
} else if (this.size() > o.size()) {
return 1;
}
// 路径跨度越大越好
if (this.getPathLength() > o.getPathLength()) {
return -1;
} else if (this.getPathLength() < o.getPathLength()) {
return 1;
}
// 根据统计学结论逆向切分概率高于正向切分因此位置越靠后的优先
if (this.pathEnd > o.pathEnd) {
return -1;
} else if (pathEnd < o.pathEnd) {
return 1;
}
// 词长越平均越好
if (this.getXWeight() > o.getXWeight()) {
return -1;
} else if (this.getXWeight() < o.getXWeight()) {
return 1;
}
// 词元位置权重比较
if (this.getPWeight() > o.getPWeight()) {
return -1;
} else if (this.getPWeight() < o.getPWeight()) {
return 1;
}
return 0; return 0;
} }