LUCENE-5344 Flexible StandardQueryParser behaves differently than ClassicQueryParser

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1558305 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adriano Crestani Campos 2014-01-15 06:36:34 +00:00
parent 428e70c223
commit 568dd9b2ed
2 changed files with 248 additions and 179 deletions

View File

@ -18,7 +18,9 @@ package org.apache.lucene.queryparser.flexible.standard.processors;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -29,9 +31,12 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException; import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode.Modifier;
import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
@ -40,6 +45,7 @@ import org.apache.lucene.queryparser.flexible.core.nodes.TextableQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.Operator;
import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode;
@ -73,6 +79,8 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
private boolean positionIncrementsEnabled; private boolean positionIncrementsEnabled;
private Operator defaultOperator;
public AnalyzerQueryNodeProcessor() { public AnalyzerQueryNodeProcessor() {
// empty constructor // empty constructor
} }
@ -85,6 +93,8 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
this.analyzer = analyzer; this.analyzer = analyzer;
this.positionIncrementsEnabled = false; this.positionIncrementsEnabled = false;
Boolean positionIncrementsEnabled = getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS); Boolean positionIncrementsEnabled = getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
Operator defaultOperator = getQueryConfigHandler().get(ConfigurationKeys.DEFAULT_OPERATOR);
this.defaultOperator = defaultOperator != null ? defaultOperator : Operator.OR;
if (positionIncrementsEnabled != null) { if (positionIncrementsEnabled != null) {
this.positionIncrementsEnabled = positionIncrementsEnabled; this.positionIncrementsEnabled = positionIncrementsEnabled;
@ -93,7 +103,6 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
if (this.analyzer != null) { if (this.analyzer != null) {
return super.process(queryTree); return super.process(queryTree);
} }
} }
return queryTree; return queryTree;
@ -119,100 +128,213 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
int positionCount = 0; int positionCount = 0;
boolean severalTokensAtSamePosition = false; boolean severalTokensAtSamePosition = false;
try (TokenStream source = this.analyzer.tokenStream(field, text)) { try {
source.reset(); try (TokenStream source = this.analyzer.tokenStream(field, text)) {
buffer = new CachingTokenFilter(source); source.reset();
buffer = new CachingTokenFilter(source);
if (buffer.hasAttribute(PositionIncrementAttribute.class)) { if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
}
try {
while (buffer.incrementToken()) {
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt
.getPositionIncrement() : 1;
if (positionIncrement != 0) {
positionCount += positionIncrement;
} else {
severalTokensAtSamePosition = true;
}
}
} catch (IOException e) {
// ignore
}
} catch (IOException e) {
throw new RuntimeException(e);
} }
try { // rewind the buffer stream
buffer.reset();
while (buffer.incrementToken()) { if (!buffer.hasAttribute(CharTermAttribute.class)) {
numTokens++; return new NoTokenFoundQueryNode();
int positionIncrement = (posIncrAtt != null) ? posIncrAtt }
.getPositionIncrement() : 1;
if (positionIncrement != 0) {
positionCount += positionIncrement;
CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
if (numTokens == 0) {
return new NoTokenFoundQueryNode();
} else if (numTokens == 1) {
String term = null;
try {
boolean hasNext;
hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
fieldNode.setText(term);
return fieldNode;
} else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
// no phrase query:
if (positionCount == 1) {
// simple case: only one position, with synonyms
LinkedList<QueryNode> children = new LinkedList<QueryNode>();
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
children.add(new FieldQueryNode(field, term, -1, -1));
}
return new GroupQueryNode(
new StandardBooleanQueryNode(children, positionCount==1));
} else { } else {
severalTokensAtSamePosition = true; // multiple positions
QueryNode q = new StandardBooleanQueryNode(Collections.<QueryNode>emptyList(),false);
QueryNode currentQuery = null;
for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
if (!(currentQuery instanceof BooleanQueryNode)) {
QueryNode t = currentQuery;
currentQuery = new StandardBooleanQueryNode(Collections.<QueryNode>emptyList(), true);
((BooleanQueryNode)currentQuery).add(t);
}
((BooleanQueryNode)currentQuery).add(new FieldQueryNode(field, term, -1, -1));
} else {
if (currentQuery != null) {
if (this.defaultOperator == Operator.OR) {
q.add(currentQuery);
} else {
q.add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
}
}
currentQuery = new FieldQueryNode(field, term, -1, -1);
}
}
if (this.defaultOperator == Operator.OR) {
q.add(currentQuery);
} else {
q.add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
}
if (q instanceof BooleanQueryNode) {
q = new GroupQueryNode(q);
}
return q;
} }
} else {
// phrase query:
MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
int position = -1;
int i = 0;
int termGroupCount = 0;
for (; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
if (positionIncrement > 0 && multiTerms.size() > 0) {
for (FieldQueryNode termNode : multiTerms) {
if (this.positionIncrementsEnabled) {
termNode.setPositionIncrement(position);
} else {
termNode.setPositionIncrement(termGroupCount);
}
mpq.add(termNode);
}
// Only increment once for each "group" of
// terms that were in the same position:
termGroupCount++;
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new FieldQueryNode(field, term, -1, -1));
}
for (FieldQueryNode termNode : multiTerms) {
if (this.positionIncrementsEnabled) {
termNode.setPositionIncrement(position);
} else {
termNode.setPositionIncrement(termGroupCount);
}
mpq.add(termNode);
}
return mpq;
} }
} catch (IOException e) { } else {
// ignore
}
} catch (IOException e) {
throw new RuntimeException(e);
}
// rewind the buffer stream TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
buffer.reset();
if (!buffer.hasAttribute(CharTermAttribute.class)) { int position = -1;
return new NoTokenFoundQueryNode();
}
CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
if (numTokens == 0) {
return new NoTokenFoundQueryNode();
} else if (numTokens == 1) {
String term = null;
try {
boolean hasNext;
hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
fieldNode.setText(term);
return fieldNode;
} else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
// no phrase query:
LinkedList<QueryNode> children = new LinkedList<QueryNode>();
for (int i = 0; i < numTokens; i++) { for (int i = 0; i < numTokens; i++) {
String term = null;
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
children.add(new FieldQueryNode(field, term, -1, -1));
}
return new GroupQueryNode(
new StandardBooleanQueryNode(children, positionCount==1));
} else {
// phrase query:
MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
int position = -1;
int i = 0;
int termGroupCount = 0;
for (; i < numTokens; i++) {
String term = null; String term = null;
int positionIncrement = 1; int positionIncrement = 1;
try { try {
boolean hasNext = buffer.incrementToken(); boolean hasNext = buffer.incrementToken();
assert hasNext == true; assert hasNext == true;
term = termAtt.toString(); term = termAtt.toString();
if (posIncrAtt != null) { if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement(); positionIncrement = posIncrAtt.getPositionIncrement();
} }
@ -221,95 +343,35 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
// safe to ignore, because we know the number of tokens // safe to ignore, because we know the number of tokens
} }
if (positionIncrement > 0 && multiTerms.size() > 0) { FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
for (FieldQueryNode termNode : multiTerms) {
if (this.positionIncrementsEnabled) {
termNode.setPositionIncrement(position);
} else {
termNode.setPositionIncrement(termGroupCount);
}
mpq.add(termNode);
}
// Only increment once for each "group" of
// terms that were in the same position:
termGroupCount++;
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new FieldQueryNode(field, term, -1, -1));
}
for (FieldQueryNode termNode : multiTerms) {
if (this.positionIncrementsEnabled) { if (this.positionIncrementsEnabled) {
termNode.setPositionIncrement(position); position += positionIncrement;
newFieldNode.setPositionIncrement(position);
} else { } else {
termNode.setPositionIncrement(termGroupCount); newFieldNode.setPositionIncrement(i);
} }
mpq.add(termNode); pq.add(newFieldNode);
} }
return mpq; return pq;
} }
} finally {
} else { if (buffer != null) {
TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
int position = -1;
for (int i = 0; i < numTokens; i++) {
String term = null;
int positionIncrement = 1;
try { try {
boolean hasNext = buffer.incrementToken(); buffer.close();
assert hasNext == true;
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
} catch (IOException e) { } catch (IOException e) {
// safe to ignore, because we know the number of tokens // safe to ignore
} }
FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
if (this.positionIncrementsEnabled) {
position += positionIncrement;
newFieldNode.setPositionIncrement(position);
} else {
newFieldNode.setPositionIncrement(i);
}
pq.add(newFieldNode);
} }
return pq;
} }
} }
return node; return node;
} }
@Override @Override

View File

@ -558,6 +558,13 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
assertQueryEquals("((stop))", qpAnalyzer, ""); assertQueryEquals("((stop))", qpAnalyzer, "");
assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery); assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery);
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
setDefaultOperatorAND(cqpc);
assertQueryEquals(cqpc, "field", "term phrase term",
"+term +(+phrase1 +phrase2) +term");
assertQueryEquals(cqpc, "field", "phrase",
"+phrase1 +phrase2");
} }
public void testRange() throws Exception { public void testRange() throws Exception {