mirror of https://github.com/apache/lucene.git
LUCENE-7698: fix CommonGramsQueryFilter to not produce a disconnected token graph
This commit is contained in:
parent
14b3622608
commit
b9c9cddff7
|
@ -142,6 +142,10 @@ Bug Fixes
|
|||
rewritten child query in their equals and hashCode implementations.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-7698: CommonGramsQueryFilter was producing a disconnected
|
||||
token graph, messing up phrase queries when it was used during query
|
||||
parsing (Ere Maijala via Mike McCandless)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE;
|
||||
|
@ -46,6 +47,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
|
||||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||
private final PositionLengthAttribute posLengthAttribute = addAttribute(PositionLengthAttribute.class);
|
||||
|
||||
private State previous;
|
||||
private String previousType;
|
||||
|
@ -91,6 +93,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
|
||||
if (isGramType()) {
|
||||
posIncAttribute.setPositionIncrement(1);
|
||||
// We must set this back to 1 (from e.g. 2 or higher) otherwise the token graph is disconnected:
|
||||
posLengthAttribute.setPositionLength(1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -109,6 +113,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
|
||||
if (isGramType()) {
|
||||
posIncAttribute.setPositionIncrement(1);
|
||||
// We must set this back to 1 (from e.g. 2 or higher) otherwise the token graph is disconnected:
|
||||
posLengthAttribute.setPositionLength(1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -92,4 +92,14 @@ public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamFactoryTes
|
|||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
|
||||
public void testCompleteGraph() throws Exception {
|
||||
CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery");
|
||||
CharArraySet words = factory.getCommonWords();
|
||||
assertTrue("words is null and it shouldn't be", words != null);
|
||||
assertTrue(words.contains("the"));
|
||||
Tokenizer tokenizer = whitespaceMockTokenizer("testing the factory works");
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertGraphStrings(stream, "testing_the the_factory factory works");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
|
|||
public class QueryBuilder {
|
||||
protected Analyzer analyzer;
|
||||
protected boolean enablePositionIncrements = true;
|
||||
protected boolean enableGraphQueries = true;
|
||||
protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false;
|
||||
|
||||
/** Creates a new QueryBuilder using the given analyzer. */
|
||||
|
@ -240,6 +241,20 @@ public class QueryBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
/** Enable or disable graph TokenStream processing (enabled by default).
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public void setEnableGraphQueries(boolean v) {
|
||||
enableGraphQueries = v;
|
||||
}
|
||||
|
||||
/** Returns true if graph TokenStream processing is enabled (default).
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public boolean getEnableGraphQueries() {
|
||||
return enableGraphQueries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a query from a token stream.
|
||||
*
|
||||
|
@ -282,7 +297,7 @@ public class QueryBuilder {
|
|||
}
|
||||
|
||||
int positionLength = posLenAtt.getPositionLength();
|
||||
if (!isGraph && positionLength > 1) {
|
||||
if (enableGraphQueries && positionLength > 1) {
|
||||
isGraph = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -533,6 +533,24 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
assertEquals(guineaPigPhrase, smart.parse("\"guinea pig\""));
|
||||
}
|
||||
|
||||
public void testEnableGraphQueries() throws Exception {
|
||||
QueryParser dumb = new QueryParser("field", new Analyzer1());
|
||||
dumb.setSplitOnWhitespace(false);
|
||||
dumb.setEnableGraphQueries(false);
|
||||
|
||||
TermQuery guinea = new TermQuery(new Term("field", "guinea"));
|
||||
TermQuery pig = new TermQuery(new Term("field", "pig"));
|
||||
TermQuery cavy = new TermQuery(new Term("field", "cavy"));
|
||||
|
||||
// A multi-word synonym source will just form a boolean query when graph queries are disabled:
|
||||
Query inner = new SynonymQuery(new Term[] {new Term("field", "cavy"), new Term("field", "guinea")});
|
||||
BooleanQuery.Builder b = new BooleanQuery.Builder();
|
||||
b.add(inner, BooleanClause.Occur.SHOULD);
|
||||
b.add(pig, BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery query = b.build();
|
||||
assertEquals(query, dumb.parse("guinea pig"));
|
||||
}
|
||||
|
||||
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
|
||||
public void testOperatorsAndMultiWordSynonyms() throws Exception {
|
||||
Analyzer a = new MockSynonymAnalyzer();
|
||||
|
|
Loading…
Reference in New Issue