From 4e3bc6190705fba8674a7e5038afdd593935f28c Mon Sep 17 00:00:00 2001 From: David Wayne Smiley Date: Thu, 7 Aug 2014 18:00:11 +0000 Subject: [PATCH] SOLR-6318: New terms QParser git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1616558 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../org/apache/solr/search/QParserPlugin.java | 1 + .../solr/search/TermsQParserPlugin.java | 140 ++++++++++++++++++ .../apache/solr/search/TestQueryTypes.java | 39 +++-- 4 files changed, 168 insertions(+), 15 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index aae3d2e5f2a..871a8301cd0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -170,6 +170,9 @@ New Features * SOLR-6302: UpdateRequestHandlers are registered implicitly /update , /update/json, /update/csv , /update/json/docs (Noble Paul) +* SOLR-6318: New "terms" QParser for efficiently filtering documents by a list of values. For + many values, it's more appropriate than a boolean query. (David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index 12bb195f087..7e353c5080f 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -48,6 +48,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI FieldQParserPlugin.NAME, FieldQParserPlugin.class, RawQParserPlugin.NAME, RawQParserPlugin.class, TermQParserPlugin.NAME, TermQParserPlugin.class, + TermsQParserPlugin.NAME, TermsQParserPlugin.class, NestedQParserPlugin.NAME, NestedQParserPlugin.class, FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class, SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class, diff --git a/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java new file mode 100644 index 00000000000..4d29f7d254f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java @@ -0,0 +1,140 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.TermsFilter; +import org.apache.lucene.search.AutomatonQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocValuesTermsFilter; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.MultiTermQueryWrapperFilter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.FieldType; + +import java.util.Arrays; +import java.util.regex.Pattern; + +/** + * Finds documents whose specified field has any of the specified values. It's like + * {@link TermQParserPlugin} but multi-valued, and supports a variety of internal algorithms. + *
Parameters: + *
f: The field name (mandatory) + *
separator: the separator delimiting the values in the query string. By + * default it's a " " which is special in that it splits on any consecutive whitespace. + *
method: Any of termsFilter (default), booleanQuery, automaton, docValuesTermsFilter. + *

+ * Note that if no values are specified then the query matches no documents. + */ +public class TermsQParserPlugin extends QParserPlugin { + public static final String NAME = "terms"; + + /** The separator to use in the underlying suggester */ + public static final String SEPARATOR = "separator"; + + /** Choose the internal algorithm */ + private static final String METHOD = "method"; + + @Override + public void init(NamedList args) { + } + + private static enum Method { + termsFilter { + @Override + Filter makeFilter(String fname, BytesRef[] bytesRefs) { + return new TermsFilter(fname, bytesRefs); + } + }, + booleanQuery { + @Override + Filter makeFilter(String fname, BytesRef[] byteRefs) { + BooleanQuery bq = new BooleanQuery(true); + for (BytesRef byteRef : byteRefs) { + bq.add(new TermQuery(new Term(fname, byteRef)), BooleanClause.Occur.SHOULD); + } + return new QueryWrapperFilter(bq); + } + }, + automaton { + @Override + Filter makeFilter(String fname, BytesRef[] byteRefs) { + Automaton union = Automata.makeStringUnion(Arrays.asList(byteRefs)); + return new MultiTermQueryWrapperFilter(new AutomatonQuery(new Term(fname), union)) { + }; + } + }, + docValuesTermsFilter {//on 4x this is FieldCacheTermsFilter but we use the 5x name any way + //note: limited to one val per doc + @Override + Filter makeFilter(String fname, BytesRef[] byteRefs) { + return new DocValuesTermsFilter(fname, byteRefs); + } + }; + + abstract Filter makeFilter(String fname, BytesRef[] byteRefs); + } + + @Override + public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + return new QParser(qstr, localParams, params, req) { + @Override + public Query parse() throws SyntaxError { + String fname = localParams.get(QueryParsing.F); + FieldType ft = req.getSchema().getFieldTypeNoEx(fname); + String separator = localParams.get(SEPARATOR, " "); + String qstr = localParams.get(QueryParsing.V);//never null + Method method = Method.valueOf(localParams.get(METHOD, Method.termsFilter.name())); + //TODO pick the default method based on various heuristics from benchmarks + + //if space then split on all whitespace & trim, otherwise strictly interpret + final boolean sepIsSpace = separator.equals(" "); + if (sepIsSpace) + qstr = qstr.trim(); + if (qstr.length() == 0) + return new BooleanQuery();//Matches nothing. + final String[] splitVals = sepIsSpace ? qstr.split("\\s+") : qstr.split(Pattern.quote(separator), -1); + assert splitVals.length > 0; + + BytesRef[] bytesRefs = new BytesRef[splitVals.length]; + for (int i = 0; i < splitVals.length; i++) { + String stringVal = splitVals[i]; + //logic same as TermQParserPlugin + BytesRef term = new BytesRef(); + if (ft != null) { + ft.readableToIndexed(stringVal, term); + } else { + term.copyChars(stringVal); + } + bytesRefs[i] = term; + } + + return new SolrConstantScoreQuery(method.makeFilter(fname, bytesRefs)); + } + }; + } +} diff --git a/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java b/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java index 8f2cee8c72e..30a5a3c21a6 100644 --- a/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java +++ b/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java @@ -30,21 +30,6 @@ public class TestQueryTypes extends AbstractSolrTestCase { public String getCoreName() { return "basic"; } - - @Override - public void setUp() throws Exception { - // if you override setUp or tearDown, you better call - // the super classes version - super.setUp(); - } - @Override - public void tearDown() throws Exception { - // if you override setUp or tearDown, you better call - // the super classes version - super.tearDown(); - } - - public void testQueryTypes() { assertU(adoc("id","0")); assertU(adoc("id","1", "v_t","Hello Dude")); @@ -98,12 +83,36 @@ public class TestQueryTypes extends AbstractSolrTestCase { ,"//result[@numFound='1']" ); + // term qparser + assertQ(req( "q", "{!term f="+f+"}"+v) + ,"//result[@numFound='1']" + ); + + // terms qparser + //wrap in spaces if space separated + final String separator = f == "v_s" ? "separator='|'" : "";//defaults to space separated + String vMod = separator == "" && random().nextBoolean() ? " " + v + " " : v; + assertQ(req( "q", "{!terms " + separator + " f=" +f+"}"+vMod) + ,"//result[@numFound='1']" + ); + // lucene range assertQ(req( "q", f + ":[\"" + v + "\" TO \"" + v + "\"]" ) ,"//result[@numFound='1']" ); } + // terms qparser, no values matches nothing + assertQ(req( "q", "*:*", "fq", "{!terms f=v_s}") + ,"//result[@numFound='0']" + ); + + String termsMethod = new String[]{"termsFilter", "booleanQuery", "automaton", "docValuesTermsFilter"}[random().nextInt(4)]; + assertQ(req( "q", "{!terms f=v_s method=" + termsMethod + " separator=|}other stuff|wow dude") + ,"//result[@numFound='2']" + ); + + // frange and function query only work on single valued field types Object[] fc_vals = new Object[] { "id",999.0