mirror of https://github.com/apache/lucene.git
SOLR-7466: reverse-aware leading wildcards in complexphrase query parser
This commit is contained in:
parent
f87efac521
commit
d3f83bb948
|
@ -119,6 +119,8 @@ Upgrade Notes
|
||||||
risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are
|
risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are
|
||||||
advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
|
advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
|
||||||
|
|
||||||
|
* SOLR-7466: complexphrase query parser now supports leading wildcards, beware of its' possible heaviness.
|
||||||
|
Users are encouraged to use ReversedWildcardFilter in index time analysis.
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
|
@ -220,6 +222,9 @@ New Features
|
||||||
|
|
||||||
* SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein)
|
* SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein)
|
||||||
|
|
||||||
|
* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory
|
||||||
|
when it's provided (Mikhail Khludnev)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||||
|
|
|
@ -63,6 +63,8 @@ import org.apache.solr.search.SyntaxError;
|
||||||
*/
|
*/
|
||||||
public abstract class SolrQueryParserBase extends QueryBuilder {
|
public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
|
|
||||||
|
protected static final String REVERSE_WILDCARD_LOWER_BOUND = new String(new char[]{ReverseStringFilter.START_OF_HEADING_MARKER + 1});
|
||||||
|
|
||||||
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
|
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
|
||||||
|
|
||||||
static final int CONJ_NONE = 0;
|
static final int CONJ_NONE = 0;
|
||||||
|
@ -889,28 +891,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
|
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
|
||||||
|
checkNullField(field);
|
||||||
|
SchemaField sf = schema.getField(field);
|
||||||
|
|
||||||
|
return part1 == null && getReversedWildcardFilterFactory(sf.getType())!=null;
|
||||||
|
}
|
||||||
|
|
||||||
// called from parser
|
// called from parser
|
||||||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||||
checkNullField(field);
|
boolean reverse = isRangeShouldBeProtectedFromReverse(field, part1);
|
||||||
SchemaField sf = schema.getField(field);
|
return getRangeQueryImpl(field, reverse ? REVERSE_WILDCARD_LOWER_BOUND : part1, part2, startInclusive || reverse, endInclusive);
|
||||||
|
|
||||||
if (part1 == null) {
|
|
||||||
ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType());
|
|
||||||
if (factory != null) {
|
|
||||||
// There will be reversed tokens starting with u0001 that we want to exclude, so
|
|
||||||
// lets start at u0002 inclusive instead.
|
|
||||||
char[] buf = new char[1];
|
|
||||||
buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1;
|
|
||||||
part1 = new String(buf);
|
|
||||||
startInclusive = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Query getRangeQueryImpl(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||||
|
checkNullField(field);
|
||||||
|
SchemaField sf = schema.getField(field);
|
||||||
|
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
||||||
|
}
|
||||||
// called from parser
|
// called from parser
|
||||||
protected Query getPrefixQuery(String field, String termStr) throws SyntaxError {
|
protected Query getPrefixQuery(String field, String termStr) throws SyntaxError {
|
||||||
checkNullField(field);
|
checkNullField(field);
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.search;
|
||||||
|
|
||||||
import org.apache.lucene.queryparser.classic.ParseException;
|
import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
|
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
|
||||||
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
@ -60,6 +61,32 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
|
||||||
*/
|
*/
|
||||||
class ComplexPhraseQParser extends QParser {
|
class ComplexPhraseQParser extends QParser {
|
||||||
|
|
||||||
|
final class SolrQueryParserDelegate extends SolrQueryParser {
|
||||||
|
private SolrQueryParserDelegate(QParser parser, String defaultField) {
|
||||||
|
super(parser, defaultField);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected org.apache.lucene.search.Query getWildcardQuery(String field, String termStr) throws SyntaxError {
|
||||||
|
return super.getWildcardQuery(field, termStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected org.apache.lucene.search.Query getRangeQuery(String field, String part1, String part2,
|
||||||
|
boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||||
|
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1) {
|
||||||
|
return super.isRangeShouldBeProtectedFromReverse(field, part1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLowerBoundForReverse() {
|
||||||
|
return REVERSE_WILDCARD_LOWER_BOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ComplexPhraseQueryParser lparser;
|
ComplexPhraseQueryParser lparser;
|
||||||
|
|
||||||
boolean inOrder = true;
|
boolean inOrder = true;
|
||||||
|
@ -87,10 +114,45 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
|
||||||
defaultField = getReq().getSchema().getDefaultSearchFieldName();
|
defaultField = getReq().getSchema().getDefaultSearchFieldName();
|
||||||
}
|
}
|
||||||
|
|
||||||
lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer());
|
SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField);
|
||||||
|
|
||||||
if (localParams != null)
|
lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer())
|
||||||
|
{
|
||||||
|
protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
|
||||||
|
try {
|
||||||
|
org.apache.lucene.search.Query wildcardQuery = reverseAwareParser.getWildcardQuery(t.field(), t.text());
|
||||||
|
setRewriteMethod(wildcardQuery);
|
||||||
|
return wildcardQuery;
|
||||||
|
} catch (SyntaxError e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query setRewriteMethod(org.apache.lucene.search.Query query) {
|
||||||
|
if (query instanceof MultiTermQuery) {
|
||||||
|
((MultiTermQuery) query).setRewriteMethod(
|
||||||
|
org.apache.lucene.search.MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||||
|
}
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive,
|
||||||
|
boolean endInclusive) {
|
||||||
|
boolean reverse = reverseAwareParser.isRangeShouldBeProtectedFromReverse(field, part1);
|
||||||
|
return super.newRangeQuery(field,
|
||||||
|
reverse ? reverseAwareParser.getLowerBoundForReverse() : part1,
|
||||||
|
part2,
|
||||||
|
startInclusive || reverse,
|
||||||
|
endInclusive);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
lparser.setAllowLeadingWildcard(true);
|
||||||
|
|
||||||
|
if (localParams != null) {
|
||||||
inOrder = localParams.getBool("inOrder", inOrder);
|
inOrder = localParams.getBool("inOrder", inOrder);
|
||||||
|
}
|
||||||
|
|
||||||
lparser.setInOrder(inOrder);
|
lparser.setInOrder(inOrder);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestComplexPhraseLeadingWildcard extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
private static final String noReverseText = "three";
|
||||||
|
private static final String withOriginal = "one";
|
||||||
|
private static final String withoutOriginal = "two";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml","schema-reversed.xml");
|
||||||
|
assertU(doc123(1, "one ever"));
|
||||||
|
assertU(doc123(2, "once forever"));
|
||||||
|
|
||||||
|
assertU(doc123(7, "once slope forever"));
|
||||||
|
assertU(doc123(8, "once again slope forever"));
|
||||||
|
assertU(doc123(9, "forever once"));
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReverseWithOriginal() throws Exception {
|
||||||
|
checkField(withOriginal);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// prefix query won't match without original tokens
|
||||||
|
@Test
|
||||||
|
public void testReverseWithoutOriginal() throws Exception {
|
||||||
|
assertQ( "prefix query doesn't work without original term",
|
||||||
|
req("q","{!complexphrase inOrder=true}\"on* for*\"",
|
||||||
|
"df",withoutOriginal),
|
||||||
|
expect());
|
||||||
|
|
||||||
|
assertQ("postfix query works fine even without original",
|
||||||
|
req("q","{!complexphrase inOrder=true}\"*nce *ver\"",
|
||||||
|
"df",withoutOriginal),
|
||||||
|
expect("2"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithoutReverse() throws Exception {
|
||||||
|
checkField(noReverseText);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkField(String field) {
|
||||||
|
assertQ(
|
||||||
|
req("q","{!complexphrase inOrder=true}\"on* *ver\"",
|
||||||
|
"df",field,
|
||||||
|
"indent","on",
|
||||||
|
"debugQuery", "true"),
|
||||||
|
expect("1","2"));
|
||||||
|
|
||||||
|
assertQ(
|
||||||
|
req("q","{!complexphrase inOrder=true}\"ON* *VER\"",
|
||||||
|
"df",field),
|
||||||
|
expect("1","2"));
|
||||||
|
|
||||||
|
assertQ(
|
||||||
|
req("q","{!complexphrase inOrder=true}\"ON* *ver\"",
|
||||||
|
"df",field),
|
||||||
|
expect("1","2"));
|
||||||
|
|
||||||
|
assertQ(
|
||||||
|
req("q","{!complexphrase inOrder=true}\"on* *ver\"~1",
|
||||||
|
"df",field),
|
||||||
|
expect("1","2","7"));
|
||||||
|
|
||||||
|
assertQ("range works if reverse doesn't mess",
|
||||||
|
req("q","{!complexphrase inOrder=true}\"on* [* TO a]\"",
|
||||||
|
"df",field),
|
||||||
|
expect());
|
||||||
|
|
||||||
|
assertQ("range works if reverse doesn't mess",
|
||||||
|
req("q","{!complexphrase inOrder=true}\"[on TO onZ] for*\"",
|
||||||
|
"df",field),
|
||||||
|
expect("2"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String doc123(int id, String text){
|
||||||
|
return adoc("id",""+id, withOriginal, text, withoutOriginal, text, noReverseText, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String [] expect(String ...ids) {
|
||||||
|
String[] xpathes = new String[ids.length+1];
|
||||||
|
xpathes[0]= "//result[@numFound=" +ids.length+ "]";
|
||||||
|
int i=1;
|
||||||
|
for(String id : ids) {
|
||||||
|
xpathes[i++] = "//doc/int[@name='id' and text()='"+id+"']";
|
||||||
|
}
|
||||||
|
return xpathes;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue