优化排序算法逻辑;

This commit is contained in:
Magese 2021-12-31 17:48:21 +08:00
parent 6ef4798752
commit dd7822b6be
1 changed files with 46 additions and 44 deletions

View File

@ -34,11 +34,17 @@ package org.wltea.analyzer.core;
@SuppressWarnings("unused")
class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
//起始位置
/**
* 起始位置
*/
private int pathBegin;
//结束
/**
* 结束
*/
private int pathEnd;
//词元链的有效字符长度
/**
* 词元链的有效字符长度
*/
private int payloadLength;
LexemePath() {
@ -100,7 +106,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/**
* 移除尾部的Lexeme
*
*/
void removeTail() {
Lexeme tail = this.pollLast();
@ -117,7 +122,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/**
* 检测词元位置交叉有歧义的切分
*
*/
boolean checkCross(Lexeme lexeme) {
return (lexeme.getBegin() >= this.pathBegin && lexeme.getBegin() < this.pathEnd)
@ -141,7 +145,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/**
* 获取LexemePath的路径长度
*
*/
private int getPathLength() {
return this.pathEnd - this.pathBegin;
@ -150,7 +153,6 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
/**
* X权重词元长度积
*
*/
private int getXWeight() {
int product = 1;
@ -191,48 +193,48 @@ class LexemePath extends QuickSortSet implements Comparable<LexemePath> {
}
public int compareTo(LexemePath o) {
//比较有效文本长度
// 比较有效文本长度
if (this.payloadLength > o.payloadLength) {
return -1;
} else if (this.payloadLength < o.payloadLength) {
return 1;
} else {
//比较词元个数越少越好
if (this.size() < o.size()) {
return -1;
} else if (this.size() > o.size()) {
return 1;
} else {
//路径跨度越大越好
if (this.getPathLength() > o.getPathLength()) {
return -1;
} else if (this.getPathLength() < o.getPathLength()) {
return 1;
} else {
//根据统计学结论逆向切分概率高于正向切分因此位置越靠后的优先
if (this.pathEnd > o.pathEnd) {
return -1;
} else if (pathEnd < o.pathEnd) {
return 1;
} else {
//词长越平均越好
if (this.getXWeight() > o.getXWeight()) {
return -1;
} else if (this.getXWeight() < o.getXWeight()) {
return 1;
} else {
//词元位置权重比较
if (this.getPWeight() > o.getPWeight()) {
return -1;
} else if (this.getPWeight() < o.getPWeight()) {
return 1;
}
}
}
}
}
}
// 比较词元个数越少越好
if (this.size() < o.size()) {
return -1;
} else if (this.size() > o.size()) {
return 1;
}
// 路径跨度越大越好
if (this.getPathLength() > o.getPathLength()) {
return -1;
} else if (this.getPathLength() < o.getPathLength()) {
return 1;
}
// 根据统计学结论逆向切分概率高于正向切分因此位置越靠后的优先
if (this.pathEnd > o.pathEnd) {
return -1;
} else if (pathEnd < o.pathEnd) {
return 1;
}
// 词长越平均越好
if (this.getXWeight() > o.getXWeight()) {
return -1;
} else if (this.getXWeight() < o.getXWeight()) {
return 1;
}
// 词元位置权重比较
if (this.getPWeight() > o.getPWeight()) {
return -1;
} else if (this.getPWeight() < o.getPWeight()) {
return 1;
}
return 0;
}