From 568dd9b2ed4e3edaee0b0862345848d3dae66c18 Mon Sep 17 00:00:00 2001 From: Adriano Crestani Campos Date: Wed, 15 Jan 2014 06:36:34 +0000 Subject: [PATCH] LUCENE-5344 Flexible StandardQueryParser behaves differently than ClassicQueryParser git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1558305 13f79535-47bb-0310-9956-ffa450edef68 --- .../AnalyzerQueryNodeProcessor.java | 420 ++++++++++-------- .../queryparser/util/QueryParserTestBase.java | 7 + 2 files changed, 248 insertions(+), 179 deletions(-) diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java index 71d67091878..c0b3e721bb1 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java @@ -18,7 +18,9 @@ package org.apache.lucene.queryparser.flexible.standard.processors; */ import java.io.IOException; + import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -29,9 +31,12 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; +import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode.Modifier; import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; @@ -40,6 +45,7 @@ import org.apache.lucene.queryparser.flexible.core.nodes.TextableQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.Operator; import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.StandardBooleanQueryNode; @@ -72,6 +78,8 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { private Analyzer analyzer; private boolean positionIncrementsEnabled; + + private Operator defaultOperator; public AnalyzerQueryNodeProcessor() { // empty constructor @@ -85,7 +93,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { this.analyzer = analyzer; this.positionIncrementsEnabled = false; Boolean positionIncrementsEnabled = getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS); - + Operator defaultOperator = getQueryConfigHandler().get(ConfigurationKeys.DEFAULT_OPERATOR); + this.defaultOperator = defaultOperator != null ? defaultOperator : Operator.OR; + if (positionIncrementsEnabled != null) { this.positionIncrementsEnabled = positionIncrementsEnabled; } @@ -93,7 +103,6 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { if (this.analyzer != null) { return super.process(queryTree); } - } return queryTree; @@ -119,197 +128,250 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { int positionCount = 0; boolean severalTokensAtSamePosition = false; - try (TokenStream source = this.analyzer.tokenStream(field, text)) { - source.reset(); - buffer = new CachingTokenFilter(source); - - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); - } - - try { - - while (buffer.incrementToken()) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt - .getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - - } else { - severalTokensAtSamePosition = true; - } - + try { + try (TokenStream source = this.analyzer.tokenStream(field, text)) { + source.reset(); + buffer = new CachingTokenFilter(source); + + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } - - } catch (IOException e) { - // ignore - } - } catch (IOException e) { - throw new RuntimeException(e); - } - - // rewind the buffer stream - buffer.reset(); - - if (!buffer.hasAttribute(CharTermAttribute.class)) { - return new NoTokenFoundQueryNode(); - } - - CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); - - if (numTokens == 0) { - return new NoTokenFoundQueryNode(); - - } else if (numTokens == 1) { - String term = null; - try { - boolean hasNext; - hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.toString(); - - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - - fieldNode.setText(term); - - return fieldNode; - - } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) { - if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) { - // no phrase query: - LinkedList children = new LinkedList(); - - for (int i = 0; i < numTokens; i++) { - String term = null; - try { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.toString(); - - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - - children.add(new FieldQueryNode(field, term, -1, -1)); - - } - return new GroupQueryNode( - new StandardBooleanQueryNode(children, positionCount==1)); - } else { - // phrase query: - MultiPhraseQueryNode mpq = new MultiPhraseQueryNode(); - - List multiTerms = new ArrayList(); - int position = -1; - int i = 0; - int termGroupCount = 0; - for (; i < numTokens; i++) { - String term = null; - int positionIncrement = 1; - try { - boolean hasNext = buffer.incrementToken(); - assert hasNext == true; - term = termAtt.toString(); - if (posIncrAtt != null) { - positionIncrement = posIncrAtt.getPositionIncrement(); - } - - } catch (IOException e) { - // safe to ignore, because we know the number of tokens - } - - if (positionIncrement > 0 && multiTerms.size() > 0) { - - for (FieldQueryNode termNode : multiTerms) { - - if (this.positionIncrementsEnabled) { - termNode.setPositionIncrement(position); - } else { - termNode.setPositionIncrement(termGroupCount); - } - - mpq.add(termNode); - - } - - // Only increment once for each "group" of - // terms that were in the same position: - termGroupCount++; - - multiTerms.clear(); - - } - - position += positionIncrement; - multiTerms.add(new FieldQueryNode(field, term, -1, -1)); - - } - - for (FieldQueryNode termNode : multiTerms) { - - if (this.positionIncrementsEnabled) { - termNode.setPositionIncrement(position); - - } else { - termNode.setPositionIncrement(termGroupCount); - } - - mpq.add(termNode); - - } - - return mpq; - - } - - } else { - - TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); - - int position = -1; - - for (int i = 0; i < numTokens; i++) { - String term = null; - int positionIncrement = 1; - + try { - boolean hasNext = buffer.incrementToken(); + + while (buffer.incrementToken()) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt + .getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + + } else { + severalTokensAtSamePosition = true; + } + + } + + } catch (IOException e) { + // ignore + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + // rewind the buffer stream + buffer.reset(); + + if (!buffer.hasAttribute(CharTermAttribute.class)) { + return new NoTokenFoundQueryNode(); + } + + CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); + + if (numTokens == 0) { + return new NoTokenFoundQueryNode(); + + } else if (numTokens == 1) { + String term = null; + try { + boolean hasNext; + hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); - - if (posIncrAtt != null) { - positionIncrement = posIncrAtt.getPositionIncrement(); - } - + } catch (IOException e) { // safe to ignore, because we know the number of tokens } - - FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); - - if (this.positionIncrementsEnabled) { - position += positionIncrement; - newFieldNode.setPositionIncrement(position); - + + fieldNode.setText(term); + + return fieldNode; + + } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) { + if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) { + // no phrase query: + + if (positionCount == 1) { + // simple case: only one position, with synonyms + LinkedList children = new LinkedList(); + + for (int i = 0; i < numTokens; i++) { + String term = null; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + children.add(new FieldQueryNode(field, term, -1, -1)); + + } + return new GroupQueryNode( + new StandardBooleanQueryNode(children, positionCount==1)); + } else { + // multiple positions + QueryNode q = new StandardBooleanQueryNode(Collections.emptyList(),false); + QueryNode currentQuery = null; + for (int i = 0; i < numTokens; i++) { + String term = null; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) { + if (!(currentQuery instanceof BooleanQueryNode)) { + QueryNode t = currentQuery; + currentQuery = new StandardBooleanQueryNode(Collections.emptyList(), true); + ((BooleanQueryNode)currentQuery).add(t); + } + ((BooleanQueryNode)currentQuery).add(new FieldQueryNode(field, term, -1, -1)); + } else { + if (currentQuery != null) { + if (this.defaultOperator == Operator.OR) { + q.add(currentQuery); + } else { + q.add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ)); + } + } + currentQuery = new FieldQueryNode(field, term, -1, -1); + } + } + if (this.defaultOperator == Operator.OR) { + q.add(currentQuery); + } else { + q.add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ)); + } + + if (q instanceof BooleanQueryNode) { + q = new GroupQueryNode(q); + } + return q; + } } else { - newFieldNode.setPositionIncrement(i); + // phrase query: + MultiPhraseQueryNode mpq = new MultiPhraseQueryNode(); + + List multiTerms = new ArrayList(); + int position = -1; + int i = 0; + int termGroupCount = 0; + for (; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (positionIncrement > 0 && multiTerms.size() > 0) { + + for (FieldQueryNode termNode : multiTerms) { + + if (this.positionIncrementsEnabled) { + termNode.setPositionIncrement(position); + } else { + termNode.setPositionIncrement(termGroupCount); + } + + mpq.add(termNode); + + } + + // Only increment once for each "group" of + // terms that were in the same position: + termGroupCount++; + + multiTerms.clear(); + + } + + position += positionIncrement; + multiTerms.add(new FieldQueryNode(field, term, -1, -1)); + + } + + for (FieldQueryNode termNode : multiTerms) { + + if (this.positionIncrementsEnabled) { + termNode.setPositionIncrement(position); + + } else { + termNode.setPositionIncrement(termGroupCount); + } + + mpq.add(termNode); + + } + + return mpq; + + } + + } else { + + TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); + + int position = -1; + + for (int i = 0; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); + + if (this.positionIncrementsEnabled) { + position += positionIncrement; + newFieldNode.setPositionIncrement(position); + + } else { + newFieldNode.setPositionIncrement(i); + } + + pq.add(newFieldNode); + + } + + return pq; + + } + } finally { + if (buffer != null) { + try { + buffer.close(); + } catch (IOException e) { + // safe to ignore } - - pq.add(newFieldNode); - } - - return pq; - } - } return node; - } @Override diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java index 7d138dbf57b..0c39939388f 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java @@ -558,6 +558,13 @@ public abstract class QueryParserTestBase extends LuceneTestCase { assertQueryEquals("((stop))", qpAnalyzer, ""); assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery); + + CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); + setDefaultOperatorAND(cqpc); + assertQueryEquals(cqpc, "field", "term phrase term", + "+term +(+phrase1 +phrase2) +term"); + assertQueryEquals(cqpc, "field", "phrase", + "+phrase1 +phrase2"); } public void testRange() throws Exception {