Resolve QueryParser exception.
This commit is contained in:
parent
fb4defedb7
commit
f0059e2c78
|
@ -46,11 +46,11 @@ import java.util.Stack;
|
||||||
public class IKQueryExpressionParser {
|
public class IKQueryExpressionParser {
|
||||||
|
|
||||||
|
|
||||||
private List<Element> elements = new ArrayList<>();
|
private final List<Element> elements = new ArrayList<>();
|
||||||
|
|
||||||
private Stack<Query> querys = new Stack<>();
|
private final Stack<Query> querys = new Stack<>();
|
||||||
|
|
||||||
private Stack<Element> operates = new Stack<>();
|
private final Stack<Element> operates = new Stack<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 解析查询表达式,生成Lucene Query对象
|
* 解析查询表达式,生成Lucene Query对象
|
||||||
|
@ -61,9 +61,9 @@ public class IKQueryExpressionParser {
|
||||||
Query lucenceQuery = null;
|
Query lucenceQuery = null;
|
||||||
if (expression != null && !"".equals(expression.trim())) {
|
if (expression != null && !"".equals(expression.trim())) {
|
||||||
try {
|
try {
|
||||||
//文法解析
|
// 文法解析
|
||||||
this.splitElements(expression);
|
this.splitElements(expression);
|
||||||
//语法解析
|
// 语法解析
|
||||||
this.parseSyntax();
|
this.parseSyntax();
|
||||||
if (this.querys.size() == 1) {
|
if (this.querys.size() == 1) {
|
||||||
lucenceQuery = this.querys.pop();
|
lucenceQuery = this.querys.pop();
|
||||||
|
@ -87,263 +87,263 @@ public class IKQueryExpressionParser {
|
||||||
if (expression == null) {
|
if (expression == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Element curretElement = null;
|
Element currentElement = null;
|
||||||
|
|
||||||
char[] expChars = expression.toCharArray();
|
char[] expChars = expression.toCharArray();
|
||||||
for (char expChar : expChars) {
|
for (char expChar : expChars) {
|
||||||
switch (expChar) {
|
switch (expChar) {
|
||||||
case '&':
|
case '&':
|
||||||
if (curretElement == null) {
|
if (currentElement == null) {
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '&';
|
currentElement.type = '&';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
} else if (curretElement.type == '&') {
|
} else if (currentElement.type == '&') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
} else if (curretElement.type == '\'') {
|
} else if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '&';
|
currentElement.type = '&';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '|':
|
case '|':
|
||||||
if (curretElement == null) {
|
if (currentElement == null) {
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '|';
|
currentElement.type = '|';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
} else if (curretElement.type == '|') {
|
} else if (currentElement.type == '|') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
} else if (curretElement.type == '\'') {
|
} else if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '|';
|
currentElement.type = '|';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '-':
|
case '-':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '-';
|
currentElement.type = '-';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '(':
|
case '(':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '(';
|
currentElement.type = '(';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ')':
|
case ')':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = ')';
|
currentElement.type = ')';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ':':
|
case ':':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = ':';
|
currentElement.type = ':';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '=':
|
case '=':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '=';
|
currentElement.type = '=';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ' ':
|
case ' ':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '\'':
|
case '\'':
|
||||||
if (curretElement == null) {
|
if (currentElement == null) {
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '\'';
|
currentElement.type = '\'';
|
||||||
|
|
||||||
} else if (curretElement.type == '\'') {
|
} else if (currentElement.type == '\'') {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '\'';
|
currentElement.type = '\'';
|
||||||
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '[':
|
case '[':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '[';
|
currentElement.type = '[';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ']':
|
case ']':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = ']';
|
currentElement.type = ']';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '{':
|
case '{':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '{';
|
currentElement.type = '{';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '}':
|
case '}':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = '}';
|
currentElement.type = '}';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case ',':
|
case ',':
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
if (curretElement.type == '\'') {
|
if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = ',';
|
currentElement.type = ',';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = null;
|
currentElement = null;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (curretElement == null) {
|
if (currentElement == null) {
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = 'F';
|
currentElement.type = 'F';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
|
|
||||||
} else if (curretElement.type == 'F') {
|
} else if (currentElement.type == 'F') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
|
|
||||||
} else if (curretElement.type == '\'') {
|
} else if (currentElement.type == '\'') {
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
curretElement = new Element();
|
currentElement = new Element();
|
||||||
curretElement.type = 'F';
|
currentElement.type = 'F';
|
||||||
curretElement.append(expChar);
|
currentElement.append(expChar);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (curretElement != null) {
|
if (currentElement != null) {
|
||||||
this.elements.add(curretElement);
|
this.elements.add(currentElement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -359,7 +359,7 @@ public class IKQueryExpressionParser {
|
||||||
throw new IllegalStateException("表达式异常: = 或 : 号丢失");
|
throw new IllegalStateException("表达式异常: = 或 : 号丢失");
|
||||||
}
|
}
|
||||||
Element e3 = this.elements.get(i + 2);
|
Element e3 = this.elements.get(i + 2);
|
||||||
//处理 = 和 : 运算
|
// 处理 = 和 : 运算
|
||||||
if ('\'' == e3.type) {
|
if ('\'' == e3.type) {
|
||||||
i += 2;
|
i += 2;
|
||||||
if ('=' == e2.type) {
|
if ('=' == e2.type) {
|
||||||
|
@ -367,14 +367,14 @@ public class IKQueryExpressionParser {
|
||||||
this.querys.push(tQuery);
|
this.querys.push(tQuery);
|
||||||
} else {
|
} else {
|
||||||
String keyword = e3.toString();
|
String keyword = e3.toString();
|
||||||
//SWMCQuery Here
|
// SWMCQuery Here
|
||||||
Query _SWMCQuery = SWMCQueryBuilder.create(e.toString(), keyword);
|
Query _SWMCQuery = SWMCQueryBuilder.create(e.toString(), keyword);
|
||||||
this.querys.push(_SWMCQuery);
|
this.querys.push(_SWMCQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ('[' == e3.type || '{' == e3.type) {
|
} else if ('[' == e3.type || '{' == e3.type) {
|
||||||
i += 2;
|
i += 2;
|
||||||
//处理 [] 和 {}
|
// 处理 [] 和 {}
|
||||||
LinkedList<Element> eQueue = new LinkedList<>();
|
LinkedList<Element> eQueue = new LinkedList<>();
|
||||||
eQueue.add(e3);
|
eQueue.add(e3);
|
||||||
for (i++; i < this.elements.size(); i++) {
|
for (i++; i < this.elements.size(); i++) {
|
||||||
|
@ -384,7 +384,7 @@ public class IKQueryExpressionParser {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//翻译RangeQuery
|
// 翻译RangeQuery
|
||||||
Query rangeQuery = this.toTermRangeQuery(e, eQueue);
|
Query rangeQuery = this.toTermRangeQuery(e, eQueue);
|
||||||
this.querys.push(rangeQuery);
|
this.querys.push(rangeQuery);
|
||||||
} else {
|
} else {
|
||||||
|
@ -475,10 +475,10 @@ public class IKQueryExpressionParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//q1 instanceof TermQuery
|
// q1 instanceof TermQuery
|
||||||
//q1 instanceof TermRangeQuery
|
// q1 instanceof TermRangeQuery
|
||||||
//q1 instanceof PhraseQuery
|
// q1 instanceof PhraseQuery
|
||||||
//others
|
// others
|
||||||
resultQuery.add(q1, Occur.MUST);
|
resultQuery.add(q1, Occur.MUST);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -496,10 +496,10 @@ public class IKQueryExpressionParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//q1 instanceof TermQuery
|
// q1 instanceof TermQuery
|
||||||
//q1 instanceof TermRangeQuery
|
// q1 instanceof TermRangeQuery
|
||||||
//q1 instanceof PhraseQuery
|
// q1 instanceof PhraseQuery
|
||||||
//others
|
// others
|
||||||
resultQuery.add(q2, Occur.MUST);
|
resultQuery.add(q2, Occur.MUST);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -518,10 +518,10 @@ public class IKQueryExpressionParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//q1 instanceof TermQuery
|
// q1 instanceof TermQuery
|
||||||
//q1 instanceof TermRangeQuery
|
// q1 instanceof TermRangeQuery
|
||||||
//q1 instanceof PhraseQuery
|
// q1 instanceof PhraseQuery
|
||||||
//others
|
// others
|
||||||
resultQuery.add(q1, Occur.SHOULD);
|
resultQuery.add(q1, Occur.SHOULD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -538,10 +538,10 @@ public class IKQueryExpressionParser {
|
||||||
resultQuery.add(q2, Occur.SHOULD);
|
resultQuery.add(q2, Occur.SHOULD);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//q2 instanceof TermQuery
|
// q2 instanceof TermQuery
|
||||||
//q2 instanceof TermRangeQuery
|
// q2 instanceof TermRangeQuery
|
||||||
//q2 instanceof PhraseQuery
|
// q2 instanceof PhraseQuery
|
||||||
//others
|
// others
|
||||||
resultQuery.add(q2, Occur.SHOULD);
|
resultQuery.add(q2, Occur.SHOULD);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -563,10 +563,10 @@ public class IKQueryExpressionParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//q1 instanceof TermQuery
|
// q1 instanceof TermQuery
|
||||||
//q1 instanceof TermRangeQuery
|
// q1 instanceof TermRangeQuery
|
||||||
//q1 instanceof PhraseQuery
|
// q1 instanceof PhraseQuery
|
||||||
//others
|
// others
|
||||||
resultQuery.add(q1, Occur.MUST);
|
resultQuery.add(q1, Occur.MUST);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -584,7 +584,7 @@ public class IKQueryExpressionParser {
|
||||||
boolean includeLast;
|
boolean includeLast;
|
||||||
String firstValue;
|
String firstValue;
|
||||||
String lastValue = null;
|
String lastValue = null;
|
||||||
//检查第一个元素是否是[或者{
|
// 检查第一个元素是否是[或者{
|
||||||
Element first = elements.getFirst();
|
Element first = elements.getFirst();
|
||||||
if ('[' == first.type) {
|
if ('[' == first.type) {
|
||||||
includeFirst = true;
|
includeFirst = true;
|
||||||
|
@ -593,7 +593,7 @@ public class IKQueryExpressionParser {
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalStateException("表达式异常");
|
throw new IllegalStateException("表达式异常");
|
||||||
}
|
}
|
||||||
//检查最后一个元素是否是]或者}
|
// 检查最后一个元素是否是]或者}
|
||||||
Element last = elements.getLast();
|
Element last = elements.getLast();
|
||||||
if (']' == last.type) {
|
if (']' == last.type) {
|
||||||
includeLast = true;
|
includeLast = true;
|
||||||
|
@ -605,7 +605,7 @@ public class IKQueryExpressionParser {
|
||||||
if (elements.size() < 4 || elements.size() > 5) {
|
if (elements.size() < 4 || elements.size() > 5) {
|
||||||
throw new IllegalStateException("表达式异常, RangeQuery 错误");
|
throw new IllegalStateException("表达式异常, RangeQuery 错误");
|
||||||
}
|
}
|
||||||
//读出中间部分
|
// 读出中间部分
|
||||||
Element e2 = elements.get(1);
|
Element e2 = elements.get(1);
|
||||||
if ('\'' == e2.type) {
|
if ('\'' == e2.type) {
|
||||||
firstValue = e2.toString();
|
firstValue = e2.toString();
|
||||||
|
@ -673,7 +673,7 @@ public class IKQueryExpressionParser {
|
||||||
* @author linliangyi
|
* @author linliangyi
|
||||||
* May 20, 2010
|
* May 20, 2010
|
||||||
*/
|
*/
|
||||||
private class Element {
|
private static class Element {
|
||||||
char type = 0;
|
char type = 0;
|
||||||
StringBuffer eleTextBuff;
|
StringBuffer eleTextBuff;
|
||||||
|
|
||||||
|
@ -692,11 +692,9 @@ public class IKQueryExpressionParser {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
IKQueryExpressionParser parser = new IKQueryExpressionParser();
|
IKQueryExpressionParser parser = new IKQueryExpressionParser();
|
||||||
//String ikQueryExp = "newsTitle:'的两款《魔兽世界》插件Bigfoot和月光宝盒'";
|
|
||||||
String ikQueryExp = "(id='ABcdRf' && date:{'20010101','20110101'} && keyword:'魔兽中国') || (content:'KSHT-KSH-A001-18' || ulr='www.ik.com') - name:'林良益'";
|
String ikQueryExp = "(id='ABcdRf' && date:{'20010101','20110101'} && keyword:'魔兽中国') || (content:'KSHT-KSH-A001-18' || ulr='www.ik.com') - name:'林良益'";
|
||||||
Query result = parser.parseExp(ikQueryExp);
|
Query result = parser.parseExp(ikQueryExp);
|
||||||
System.out.println(result);
|
System.out.println(result);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,7 @@ import java.util.List;
|
||||||
*
|
*
|
||||||
* @author linliangyi
|
* @author linliangyi
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("unused")
|
||||||
class SWMCQueryBuilder {
|
class SWMCQueryBuilder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -56,9 +57,9 @@ class SWMCQueryBuilder {
|
||||||
if (fieldName == null || keywords == null) {
|
if (fieldName == null || keywords == null) {
|
||||||
throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
|
throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
|
||||||
}
|
}
|
||||||
//1.对keywords进行分词处理
|
// 1.对keywords进行分词处理
|
||||||
List<Lexeme> lexemes = doAnalyze(keywords);
|
List<Lexeme> lexemes = doAnalyze(keywords);
|
||||||
//2.根据分词结果,生成SWMCQuery
|
// 2.根据分词结果,生成SWMCQuery
|
||||||
return getSWMCQuery(fieldName, lexemes);
|
return getSWMCQuery(fieldName, lexemes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,20 +85,20 @@ class SWMCQueryBuilder {
|
||||||
* 根据分词结果生成SWMC搜索
|
* 根据分词结果生成SWMC搜索
|
||||||
*/
|
*/
|
||||||
private static Query getSWMCQuery(String fieldName, List<Lexeme> lexemes) {
|
private static Query getSWMCQuery(String fieldName, List<Lexeme> lexemes) {
|
||||||
//构造SWMC的查询表达式
|
// 构造SWMC的查询表达式
|
||||||
StringBuilder keywordBuffer = new StringBuilder();
|
StringBuilder keywordBuffer = new StringBuilder();
|
||||||
//精简的SWMC的查询表达式
|
// 精简的SWMC的查询表达式
|
||||||
StringBuilder keywordBuffer_Short = new StringBuilder();
|
StringBuilder keywordBuffer_Short = new StringBuilder();
|
||||||
//记录最后词元长度
|
// 记录最后词元长度
|
||||||
int lastLexemeLength = 0;
|
int lastLexemeLength = 0;
|
||||||
//记录最后词元结束位置
|
// 记录最后词元结束位置
|
||||||
int lastLexemeEnd = -1;
|
int lastLexemeEnd = -1;
|
||||||
|
|
||||||
int shortCount = 0;
|
int shortCount = 0;
|
||||||
int totalCount = 0;
|
int totalCount = 0;
|
||||||
for (Lexeme l : lexemes) {
|
for (Lexeme l : lexemes) {
|
||||||
totalCount += l.getLength();
|
totalCount += l.getLength();
|
||||||
//精简表达式
|
// 精简表达式
|
||||||
if (l.getLength() > 1) {
|
if (l.getLength() > 1) {
|
||||||
keywordBuffer_Short.append(' ').append(l.getLexemeText());
|
keywordBuffer_Short.append(' ').append(l.getLexemeText());
|
||||||
shortCount += l.getLength();
|
shortCount += l.getLength();
|
||||||
|
@ -106,7 +107,7 @@ class SWMCQueryBuilder {
|
||||||
if (lastLexemeLength == 0) {
|
if (lastLexemeLength == 0) {
|
||||||
keywordBuffer.append(l.getLexemeText());
|
keywordBuffer.append(l.getLexemeText());
|
||||||
} else if (lastLexemeLength == 1 && l.getLength() == 1
|
} else if (lastLexemeLength == 1 && l.getLength() == 1
|
||||||
&& lastLexemeEnd == l.getBeginPosition()) {//单字位置相邻,长度为一,合并)
|
&& lastLexemeEnd == l.getBeginPosition()) {// 单字位置相邻,长度为一,合并)
|
||||||
keywordBuffer.append(l.getLexemeText());
|
keywordBuffer.append(l.getLexemeText());
|
||||||
} else {
|
} else {
|
||||||
keywordBuffer.append(' ').append(l.getLexemeText());
|
keywordBuffer.append(' ').append(l.getLexemeText());
|
||||||
|
@ -116,10 +117,10 @@ class SWMCQueryBuilder {
|
||||||
lastLexemeEnd = l.getEndPosition();
|
lastLexemeEnd = l.getEndPosition();
|
||||||
}
|
}
|
||||||
|
|
||||||
//借助lucene queryparser 生成SWMC Query
|
// 借助lucene queryparser 生成SWMC Query
|
||||||
QueryParser qp = new QueryParser(fieldName, new StandardAnalyzer());
|
QueryParser qp = new QueryParser(fieldName, new StandardAnalyzer());
|
||||||
|
qp.setAutoGeneratePhraseQueries(false);
|
||||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||||
qp.setAutoGeneratePhraseQueries(true);
|
|
||||||
|
|
||||||
if ((shortCount * 1.0f / totalCount) > 0.5f) {
|
if ((shortCount * 1.0f / totalCount) > 0.5f) {
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in New Issue