From d3f83bb948fd44e66099ef9537363ecef5bdb0f3 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Fri, 30 Dec 2016 00:01:20 +0300 Subject: [PATCH] SOLR-7466: reverse-aware leading wildcards in complexphrase query parser --- solr/CHANGES.txt | 5 + .../solr/parser/SolrQueryParserBase.java | 30 +++-- .../search/ComplexPhraseQParserPlugin.java | 68 ++++++++++- .../TestComplexPhraseLeadingWildcard.java | 113 ++++++++++++++++++ 4 files changed, 197 insertions(+), 19 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a499cc85fd9..ff1fdc783f4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -119,6 +119,8 @@ Upgrade Notes risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files. +* SOLR-7466: complexphrase query parser now supports leading wildcards, beware of its' possible heaviness. + Users are encouraged to use ReversedWildcardFilter in index time analysis. New Features ---------------------- @@ -220,6 +222,9 @@ New Features * SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein) +* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory + when it's provided (Mikhail Khludnev) + Optimizations ---------------------- * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index f54e9e98ae4..84ffcb94568 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -63,6 +63,8 @@ import org.apache.solr.search.SyntaxError; */ public abstract class SolrQueryParserBase extends QueryBuilder { + protected static final String REVERSE_WILDCARD_LOWER_BOUND = new String(new char[]{ReverseStringFilter.START_OF_HEADING_MARKER + 1}); + public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage static final int CONJ_NONE = 0; @@ -889,28 +891,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder { return newFieldQuery(getAnalyzer(), field, queryText, quoted); } + protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ + checkNullField(field); + SchemaField sf = schema.getField(field); + return part1 == null && getReversedWildcardFilterFactory(sf.getType())!=null; + } // called from parser protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError { - checkNullField(field); - SchemaField sf = schema.getField(field); - - if (part1 == null) { - ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType()); - if (factory != null) { - // There will be reversed tokens starting with u0001 that we want to exclude, so - // lets start at u0002 inclusive instead. - char[] buf = new char[1]; - buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1; - part1 = new String(buf); - startInclusive = true; - } - } - - return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive); + boolean reverse = isRangeShouldBeProtectedFromReverse(field, part1); + return getRangeQueryImpl(field, reverse ? REVERSE_WILDCARD_LOWER_BOUND : part1, part2, startInclusive || reverse, endInclusive); } + protected Query getRangeQueryImpl(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError { + checkNullField(field); + SchemaField sf = schema.getField(field); + return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive); + } // called from parser protected Query getPrefixQuery(String field, String termStr) throws SyntaxError { checkNullField(field); diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java index 2904de9e4c3..22702dcdb85 100644 --- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -18,6 +18,7 @@ package org.apache.solr.search; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; @@ -60,6 +61,32 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin { */ class ComplexPhraseQParser extends QParser { + final class SolrQueryParserDelegate extends SolrQueryParser { + private SolrQueryParserDelegate(QParser parser, String defaultField) { + super(parser, defaultField); + } + + @Override + protected org.apache.lucene.search.Query getWildcardQuery(String field, String termStr) throws SyntaxError { + return super.getWildcardQuery(field, termStr); + } + + @Override + protected org.apache.lucene.search.Query getRangeQuery(String field, String part1, String part2, + boolean startInclusive, boolean endInclusive) throws SyntaxError { + return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive); + } + + @Override + protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1) { + return super.isRangeShouldBeProtectedFromReverse(field, part1); + } + + public String getLowerBoundForReverse() { + return REVERSE_WILDCARD_LOWER_BOUND; + } + } + ComplexPhraseQueryParser lparser; boolean inOrder = true; @@ -87,11 +114,46 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin { defaultField = getReq().getSchema().getDefaultSearchFieldName(); } - lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()); + SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField); + + lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()) + { + protected Query newWildcardQuery(org.apache.lucene.index.Term t) { + try { + org.apache.lucene.search.Query wildcardQuery = reverseAwareParser.getWildcardQuery(t.field(), t.text()); + setRewriteMethod(wildcardQuery); + return wildcardQuery; + } catch (SyntaxError e) { + throw new RuntimeException(e); + } + } - if (localParams != null) + private Query setRewriteMethod(org.apache.lucene.search.Query query) { + if (query instanceof MultiTermQuery) { + ((MultiTermQuery) query).setRewriteMethod( + org.apache.lucene.search.MultiTermQuery.SCORING_BOOLEAN_REWRITE); + } + return query; + } + + protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, + boolean endInclusive) { + boolean reverse = reverseAwareParser.isRangeShouldBeProtectedFromReverse(field, part1); + return super.newRangeQuery(field, + reverse ? reverseAwareParser.getLowerBoundForReverse() : part1, + part2, + startInclusive || reverse, + endInclusive); + } + } + ; + + lparser.setAllowLeadingWildcard(true); + + if (localParams != null) { inOrder = localParams.getBool("inOrder", inOrder); - + } + lparser.setInOrder(inOrder); QueryParser.Operator defaultOperator = QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP)); diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java new file mode 100644 index 00000000000..6c48cc34e63 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestComplexPhraseLeadingWildcard extends SolrTestCaseJ4 { + + private static final String noReverseText = "three"; + private static final String withOriginal = "one"; + private static final String withoutOriginal = "two"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml","schema-reversed.xml"); + assertU(doc123(1, "one ever")); + assertU(doc123(2, "once forever")); + + assertU(doc123(7, "once slope forever")); + assertU(doc123(8, "once again slope forever")); + assertU(doc123(9, "forever once")); + assertU(commit()); + } + + @Test + public void testReverseWithOriginal() throws Exception { + checkField(withOriginal); + + } + + // prefix query won't match without original tokens + @Test + public void testReverseWithoutOriginal() throws Exception { + assertQ( "prefix query doesn't work without original term", + req("q","{!complexphrase inOrder=true}\"on* for*\"", + "df",withoutOriginal), + expect()); + + assertQ("postfix query works fine even without original", + req("q","{!complexphrase inOrder=true}\"*nce *ver\"", + "df",withoutOriginal), + expect("2")); + } + + @Test + public void testWithoutReverse() throws Exception { + checkField(noReverseText); + } + + private void checkField(String field) { + assertQ( + req("q","{!complexphrase inOrder=true}\"on* *ver\"", + "df",field, + "indent","on", + "debugQuery", "true"), + expect("1","2")); + + assertQ( + req("q","{!complexphrase inOrder=true}\"ON* *VER\"", + "df",field), + expect("1","2")); + + assertQ( + req("q","{!complexphrase inOrder=true}\"ON* *ver\"", + "df",field), + expect("1","2")); + + assertQ( + req("q","{!complexphrase inOrder=true}\"on* *ver\"~1", + "df",field), + expect("1","2","7")); + + assertQ("range works if reverse doesn't mess", + req("q","{!complexphrase inOrder=true}\"on* [* TO a]\"", + "df",field), + expect()); + + assertQ("range works if reverse doesn't mess", + req("q","{!complexphrase inOrder=true}\"[on TO onZ] for*\"", + "df",field), + expect("2")); + } + + private static String doc123(int id, String text){ + return adoc("id",""+id, withOriginal, text, withoutOriginal, text, noReverseText, text); + } + + private static String [] expect(String ...ids) { + String[] xpathes = new String[ids.length+1]; + xpathes[0]= "//result[@numFound=" +ids.length+ "]"; + int i=1; + for(String id : ids) { + xpathes[i++] = "//doc/int[@name='id' and text()='"+id+"']"; + } + return xpathes; + } +}