mirror of https://github.com/apache/lucene.git
SOLR-7466: reverse-aware leading wildcards in complexphrase query parser
This commit is contained in:
parent
f87efac521
commit
d3f83bb948
|
@ -119,6 +119,8 @@ Upgrade Notes
|
|||
risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are
|
||||
advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
|
||||
|
||||
* SOLR-7466: complexphrase query parser now supports leading wildcards, beware of its' possible heaviness.
|
||||
Users are encouraged to use ReversedWildcardFilter in index time analysis.
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
@ -220,6 +222,9 @@ New Features
|
|||
|
||||
* SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein)
|
||||
|
||||
* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory
|
||||
when it's provided (Mikhail Khludnev)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||
|
|
|
@ -63,6 +63,8 @@ import org.apache.solr.search.SyntaxError;
|
|||
*/
|
||||
public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
|
||||
protected static final String REVERSE_WILDCARD_LOWER_BOUND = new String(new char[]{ReverseStringFilter.START_OF_HEADING_MARKER + 1});
|
||||
|
||||
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
|
||||
|
||||
static final int CONJ_NONE = 0;
|
||||
|
@ -889,28 +891,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
|
||||
}
|
||||
|
||||
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
|
||||
checkNullField(field);
|
||||
SchemaField sf = schema.getField(field);
|
||||
|
||||
return part1 == null && getReversedWildcardFilterFactory(sf.getType())!=null;
|
||||
}
|
||||
|
||||
// called from parser
|
||||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
SchemaField sf = schema.getField(field);
|
||||
|
||||
if (part1 == null) {
|
||||
ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType());
|
||||
if (factory != null) {
|
||||
// There will be reversed tokens starting with u0001 that we want to exclude, so
|
||||
// lets start at u0002 inclusive instead.
|
||||
char[] buf = new char[1];
|
||||
buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1;
|
||||
part1 = new String(buf);
|
||||
startInclusive = true;
|
||||
}
|
||||
}
|
||||
|
||||
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
||||
boolean reverse = isRangeShouldBeProtectedFromReverse(field, part1);
|
||||
return getRangeQueryImpl(field, reverse ? REVERSE_WILDCARD_LOWER_BOUND : part1, part2, startInclusive || reverse, endInclusive);
|
||||
}
|
||||
|
||||
protected Query getRangeQueryImpl(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
SchemaField sf = schema.getField(field);
|
||||
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
||||
}
|
||||
// called from parser
|
||||
protected Query getPrefixQuery(String field, String termStr) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.search;
|
|||
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -60,6 +61,32 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
|
|||
*/
|
||||
class ComplexPhraseQParser extends QParser {
|
||||
|
||||
final class SolrQueryParserDelegate extends SolrQueryParser {
|
||||
private SolrQueryParserDelegate(QParser parser, String defaultField) {
|
||||
super(parser, defaultField);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected org.apache.lucene.search.Query getWildcardQuery(String field, String termStr) throws SyntaxError {
|
||||
return super.getWildcardQuery(field, termStr);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected org.apache.lucene.search.Query getRangeQuery(String field, String part1, String part2,
|
||||
boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1) {
|
||||
return super.isRangeShouldBeProtectedFromReverse(field, part1);
|
||||
}
|
||||
|
||||
public String getLowerBoundForReverse() {
|
||||
return REVERSE_WILDCARD_LOWER_BOUND;
|
||||
}
|
||||
}
|
||||
|
||||
ComplexPhraseQueryParser lparser;
|
||||
|
||||
boolean inOrder = true;
|
||||
|
@ -87,11 +114,46 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
|
|||
defaultField = getReq().getSchema().getDefaultSearchFieldName();
|
||||
}
|
||||
|
||||
lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer());
|
||||
SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField);
|
||||
|
||||
lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer())
|
||||
{
|
||||
protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
|
||||
try {
|
||||
org.apache.lucene.search.Query wildcardQuery = reverseAwareParser.getWildcardQuery(t.field(), t.text());
|
||||
setRewriteMethod(wildcardQuery);
|
||||
return wildcardQuery;
|
||||
} catch (SyntaxError e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (localParams != null)
|
||||
private Query setRewriteMethod(org.apache.lucene.search.Query query) {
|
||||
if (query instanceof MultiTermQuery) {
|
||||
((MultiTermQuery) query).setRewriteMethod(
|
||||
org.apache.lucene.search.MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive,
|
||||
boolean endInclusive) {
|
||||
boolean reverse = reverseAwareParser.isRangeShouldBeProtectedFromReverse(field, part1);
|
||||
return super.newRangeQuery(field,
|
||||
reverse ? reverseAwareParser.getLowerBoundForReverse() : part1,
|
||||
part2,
|
||||
startInclusive || reverse,
|
||||
endInclusive);
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
lparser.setAllowLeadingWildcard(true);
|
||||
|
||||
if (localParams != null) {
|
||||
inOrder = localParams.getBool("inOrder", inOrder);
|
||||
|
||||
}
|
||||
|
||||
lparser.setInOrder(inOrder);
|
||||
|
||||
QueryParser.Operator defaultOperator = QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP));
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestComplexPhraseLeadingWildcard extends SolrTestCaseJ4 {
|
||||
|
||||
private static final String noReverseText = "three";
|
||||
private static final String withOriginal = "one";
|
||||
private static final String withoutOriginal = "two";
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml","schema-reversed.xml");
|
||||
assertU(doc123(1, "one ever"));
|
||||
assertU(doc123(2, "once forever"));
|
||||
|
||||
assertU(doc123(7, "once slope forever"));
|
||||
assertU(doc123(8, "once again slope forever"));
|
||||
assertU(doc123(9, "forever once"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReverseWithOriginal() throws Exception {
|
||||
checkField(withOriginal);
|
||||
|
||||
}
|
||||
|
||||
// prefix query won't match without original tokens
|
||||
@Test
|
||||
public void testReverseWithoutOriginal() throws Exception {
|
||||
assertQ( "prefix query doesn't work without original term",
|
||||
req("q","{!complexphrase inOrder=true}\"on* for*\"",
|
||||
"df",withoutOriginal),
|
||||
expect());
|
||||
|
||||
assertQ("postfix query works fine even without original",
|
||||
req("q","{!complexphrase inOrder=true}\"*nce *ver\"",
|
||||
"df",withoutOriginal),
|
||||
expect("2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithoutReverse() throws Exception {
|
||||
checkField(noReverseText);
|
||||
}
|
||||
|
||||
private void checkField(String field) {
|
||||
assertQ(
|
||||
req("q","{!complexphrase inOrder=true}\"on* *ver\"",
|
||||
"df",field,
|
||||
"indent","on",
|
||||
"debugQuery", "true"),
|
||||
expect("1","2"));
|
||||
|
||||
assertQ(
|
||||
req("q","{!complexphrase inOrder=true}\"ON* *VER\"",
|
||||
"df",field),
|
||||
expect("1","2"));
|
||||
|
||||
assertQ(
|
||||
req("q","{!complexphrase inOrder=true}\"ON* *ver\"",
|
||||
"df",field),
|
||||
expect("1","2"));
|
||||
|
||||
assertQ(
|
||||
req("q","{!complexphrase inOrder=true}\"on* *ver\"~1",
|
||||
"df",field),
|
||||
expect("1","2","7"));
|
||||
|
||||
assertQ("range works if reverse doesn't mess",
|
||||
req("q","{!complexphrase inOrder=true}\"on* [* TO a]\"",
|
||||
"df",field),
|
||||
expect());
|
||||
|
||||
assertQ("range works if reverse doesn't mess",
|
||||
req("q","{!complexphrase inOrder=true}\"[on TO onZ] for*\"",
|
||||
"df",field),
|
||||
expect("2"));
|
||||
}
|
||||
|
||||
private static String doc123(int id, String text){
|
||||
return adoc("id",""+id, withOriginal, text, withoutOriginal, text, noReverseText, text);
|
||||
}
|
||||
|
||||
private static String [] expect(String ...ids) {
|
||||
String[] xpathes = new String[ids.length+1];
|
||||
xpathes[0]= "//result[@numFound=" +ids.length+ "]";
|
||||
int i=1;
|
||||
for(String id : ids) {
|
||||
xpathes[i++] = "//doc/int[@name='id' and text()='"+id+"']";
|
||||
}
|
||||
return xpathes;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue