SOLR-7466: reverse-aware leading wildcards in complexphrase query parser

This commit is contained in:
Mikhail Khludnev 2016-12-30 00:01:20 +03:00
parent f87efac521
commit d3f83bb948
4 changed files with 197 additions and 19 deletions

View File

@ -119,6 +119,8 @@ Upgrade Notes
risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are
advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
* SOLR-7466: complexphrase query parser now supports leading wildcards, beware of its' possible heaviness.
Users are encouraged to use ReversedWildcardFilter in index time analysis.
New Features
----------------------
@ -220,6 +222,9 @@ New Features
* SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein)
* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory
when it's provided (Mikhail Khludnev)
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have

View File

@ -63,6 +63,8 @@ import org.apache.solr.search.SyntaxError;
*/
public abstract class SolrQueryParserBase extends QueryBuilder {
protected static final String REVERSE_WILDCARD_LOWER_BOUND = new String(new char[]{ReverseStringFilter.START_OF_HEADING_MARKER + 1});
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
static final int CONJ_NONE = 0;
@ -889,28 +891,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
}
// called from parser
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
checkNullField(field);
SchemaField sf = schema.getField(field);
if (part1 == null) {
ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType());
if (factory != null) {
// There will be reversed tokens starting with u0001 that we want to exclude, so
// lets start at u0002 inclusive instead.
char[] buf = new char[1];
buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1;
part1 = new String(buf);
startInclusive = true;
}
return part1 == null && getReversedWildcardFilterFactory(sf.getType())!=null;
}
// called from parser
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
boolean reverse = isRangeShouldBeProtectedFromReverse(field, part1);
return getRangeQueryImpl(field, reverse ? REVERSE_WILDCARD_LOWER_BOUND : part1, part2, startInclusive || reverse, endInclusive);
}
protected Query getRangeQueryImpl(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
checkNullField(field);
SchemaField sf = schema.getField(field);
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
}
// called from parser
protected Query getPrefixQuery(String field, String termStr) throws SyntaxError {
checkNullField(field);

View File

@ -18,6 +18,7 @@ package org.apache.solr.search;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
@ -60,6 +61,32 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
*/
class ComplexPhraseQParser extends QParser {
final class SolrQueryParserDelegate extends SolrQueryParser {
private SolrQueryParserDelegate(QParser parser, String defaultField) {
super(parser, defaultField);
}
@Override
protected org.apache.lucene.search.Query getWildcardQuery(String field, String termStr) throws SyntaxError {
return super.getWildcardQuery(field, termStr);
}
@Override
protected org.apache.lucene.search.Query getRangeQuery(String field, String part1, String part2,
boolean startInclusive, boolean endInclusive) throws SyntaxError {
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
}
@Override
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1) {
return super.isRangeShouldBeProtectedFromReverse(field, part1);
}
public String getLowerBoundForReverse() {
return REVERSE_WILDCARD_LOWER_BOUND;
}
}
ComplexPhraseQueryParser lparser;
boolean inOrder = true;
@ -87,10 +114,45 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
defaultField = getReq().getSchema().getDefaultSearchFieldName();
}
lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer());
SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField);
if (localParams != null)
lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer())
{
protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
try {
org.apache.lucene.search.Query wildcardQuery = reverseAwareParser.getWildcardQuery(t.field(), t.text());
setRewriteMethod(wildcardQuery);
return wildcardQuery;
} catch (SyntaxError e) {
throw new RuntimeException(e);
}
}
private Query setRewriteMethod(org.apache.lucene.search.Query query) {
if (query instanceof MultiTermQuery) {
((MultiTermQuery) query).setRewriteMethod(
org.apache.lucene.search.MultiTermQuery.SCORING_BOOLEAN_REWRITE);
}
return query;
}
protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive,
boolean endInclusive) {
boolean reverse = reverseAwareParser.isRangeShouldBeProtectedFromReverse(field, part1);
return super.newRangeQuery(field,
reverse ? reverseAwareParser.getLowerBoundForReverse() : part1,
part2,
startInclusive || reverse,
endInclusive);
}
}
;
lparser.setAllowLeadingWildcard(true);
if (localParams != null) {
inOrder = localParams.getBool("inOrder", inOrder);
}
lparser.setInOrder(inOrder);

View File

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestComplexPhraseLeadingWildcard extends SolrTestCaseJ4 {
private static final String noReverseText = "three";
private static final String withOriginal = "one";
private static final String withoutOriginal = "two";
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema-reversed.xml");
assertU(doc123(1, "one ever"));
assertU(doc123(2, "once forever"));
assertU(doc123(7, "once slope forever"));
assertU(doc123(8, "once again slope forever"));
assertU(doc123(9, "forever once"));
assertU(commit());
}
@Test
public void testReverseWithOriginal() throws Exception {
checkField(withOriginal);
}
// prefix query won't match without original tokens
@Test
public void testReverseWithoutOriginal() throws Exception {
assertQ( "prefix query doesn't work without original term",
req("q","{!complexphrase inOrder=true}\"on* for*\"",
"df",withoutOriginal),
expect());
assertQ("postfix query works fine even without original",
req("q","{!complexphrase inOrder=true}\"*nce *ver\"",
"df",withoutOriginal),
expect("2"));
}
@Test
public void testWithoutReverse() throws Exception {
checkField(noReverseText);
}
private void checkField(String field) {
assertQ(
req("q","{!complexphrase inOrder=true}\"on* *ver\"",
"df",field,
"indent","on",
"debugQuery", "true"),
expect("1","2"));
assertQ(
req("q","{!complexphrase inOrder=true}\"ON* *VER\"",
"df",field),
expect("1","2"));
assertQ(
req("q","{!complexphrase inOrder=true}\"ON* *ver\"",
"df",field),
expect("1","2"));
assertQ(
req("q","{!complexphrase inOrder=true}\"on* *ver\"~1",
"df",field),
expect("1","2","7"));
assertQ("range works if reverse doesn't mess",
req("q","{!complexphrase inOrder=true}\"on* [* TO a]\"",
"df",field),
expect());
assertQ("range works if reverse doesn't mess",
req("q","{!complexphrase inOrder=true}\"[on TO onZ] for*\"",
"df",field),
expect("2"));
}
private static String doc123(int id, String text){
return adoc("id",""+id, withOriginal, text, withoutOriginal, text, noReverseText, text);
}
private static String [] expect(String ...ids) {
String[] xpathes = new String[ids.length+1];
xpathes[0]= "//result[@numFound=" +ids.length+ "]";
int i=1;
for(String id : ids) {
xpathes[i++] = "//doc/int[@name='id' and text()='"+id+"']";
}
return xpathes;
}
}