mirror of
https://github.com/apache/lucene.git
synced 2025-02-22 10:15:27 +00:00
SOLR-6318: New terms QParser
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1616558 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c229b7f1b3
commit
4e3bc61907
@ -170,6 +170,9 @@ New Features
|
||||
* SOLR-6302: UpdateRequestHandlers are registered implicitly /update ,
|
||||
/update/json, /update/csv , /update/json/docs (Noble Paul)
|
||||
|
||||
* SOLR-6318: New "terms" QParser for efficiently filtering documents by a list of values. For
|
||||
many values, it's more appropriate than a boolean query. (David Smiley)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
@ -48,6 +48,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
|
||||
FieldQParserPlugin.NAME, FieldQParserPlugin.class,
|
||||
RawQParserPlugin.NAME, RawQParserPlugin.class,
|
||||
TermQParserPlugin.NAME, TermQParserPlugin.class,
|
||||
TermsQParserPlugin.NAME, TermsQParserPlugin.class,
|
||||
NestedQParserPlugin.NAME, NestedQParserPlugin.class,
|
||||
FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class,
|
||||
SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class,
|
||||
|
@ -0,0 +1,140 @@
|
||||
package org.apache.solr.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesTermsFilter;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Finds documents whose specified field has any of the specified values. It's like
|
||||
* {@link TermQParserPlugin} but multi-valued, and supports a variety of internal algorithms.
|
||||
* <br>Parameters:
|
||||
* <br><code>f</code>: The field name (mandatory)
|
||||
* <br><code>separator</code>: the separator delimiting the values in the query string. By
|
||||
* default it's a " " which is special in that it splits on any consecutive whitespace.
|
||||
* <br><code>method</code>: Any of termsFilter (default), booleanQuery, automaton, docValuesTermsFilter.
|
||||
* <p>
|
||||
* Note that if no values are specified then the query matches no documents.
|
||||
*/
|
||||
public class TermsQParserPlugin extends QParserPlugin {
|
||||
public static final String NAME = "terms";
|
||||
|
||||
/** The separator to use in the underlying suggester */
|
||||
public static final String SEPARATOR = "separator";
|
||||
|
||||
/** Choose the internal algorithm */
|
||||
private static final String METHOD = "method";
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
}
|
||||
|
||||
private static enum Method {
|
||||
termsFilter {
|
||||
@Override
|
||||
Filter makeFilter(String fname, BytesRef[] bytesRefs) {
|
||||
return new TermsFilter(fname, bytesRefs);
|
||||
}
|
||||
},
|
||||
booleanQuery {
|
||||
@Override
|
||||
Filter makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
BooleanQuery bq = new BooleanQuery(true);
|
||||
for (BytesRef byteRef : byteRefs) {
|
||||
bq.add(new TermQuery(new Term(fname, byteRef)), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return new QueryWrapperFilter(bq);
|
||||
}
|
||||
},
|
||||
automaton {
|
||||
@Override
|
||||
Filter makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
Automaton union = Automata.makeStringUnion(Arrays.asList(byteRefs));
|
||||
return new MultiTermQueryWrapperFilter<AutomatonQuery>(new AutomatonQuery(new Term(fname), union)) {
|
||||
};
|
||||
}
|
||||
},
|
||||
docValuesTermsFilter {//on 4x this is FieldCacheTermsFilter but we use the 5x name any way
|
||||
//note: limited to one val per doc
|
||||
@Override
|
||||
Filter makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
return new DocValuesTermsFilter(fname, byteRefs);
|
||||
}
|
||||
};
|
||||
|
||||
abstract Filter makeFilter(String fname, BytesRef[] byteRefs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||
return new QParser(qstr, localParams, params, req) {
|
||||
@Override
|
||||
public Query parse() throws SyntaxError {
|
||||
String fname = localParams.get(QueryParsing.F);
|
||||
FieldType ft = req.getSchema().getFieldTypeNoEx(fname);
|
||||
String separator = localParams.get(SEPARATOR, " ");
|
||||
String qstr = localParams.get(QueryParsing.V);//never null
|
||||
Method method = Method.valueOf(localParams.get(METHOD, Method.termsFilter.name()));
|
||||
//TODO pick the default method based on various heuristics from benchmarks
|
||||
|
||||
//if space then split on all whitespace & trim, otherwise strictly interpret
|
||||
final boolean sepIsSpace = separator.equals(" ");
|
||||
if (sepIsSpace)
|
||||
qstr = qstr.trim();
|
||||
if (qstr.length() == 0)
|
||||
return new BooleanQuery();//Matches nothing.
|
||||
final String[] splitVals = sepIsSpace ? qstr.split("\\s+") : qstr.split(Pattern.quote(separator), -1);
|
||||
assert splitVals.length > 0;
|
||||
|
||||
BytesRef[] bytesRefs = new BytesRef[splitVals.length];
|
||||
for (int i = 0; i < splitVals.length; i++) {
|
||||
String stringVal = splitVals[i];
|
||||
//logic same as TermQParserPlugin
|
||||
BytesRef term = new BytesRef();
|
||||
if (ft != null) {
|
||||
ft.readableToIndexed(stringVal, term);
|
||||
} else {
|
||||
term.copyChars(stringVal);
|
||||
}
|
||||
bytesRefs[i] = term;
|
||||
}
|
||||
|
||||
return new SolrConstantScoreQuery(method.makeFilter(fname, bytesRefs));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -30,21 +30,6 @@ public class TestQueryTypes extends AbstractSolrTestCase {
|
||||
|
||||
public String getCoreName() { return "basic"; }
|
||||
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.setUp();
|
||||
}
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
|
||||
public void testQueryTypes() {
|
||||
assertU(adoc("id","0"));
|
||||
assertU(adoc("id","1", "v_t","Hello Dude"));
|
||||
@ -98,12 +83,36 @@ public class TestQueryTypes extends AbstractSolrTestCase {
|
||||
,"//result[@numFound='1']"
|
||||
);
|
||||
|
||||
// term qparser
|
||||
assertQ(req( "q", "{!term f="+f+"}"+v)
|
||||
,"//result[@numFound='1']"
|
||||
);
|
||||
|
||||
// terms qparser
|
||||
//wrap in spaces if space separated
|
||||
final String separator = f == "v_s" ? "separator='|'" : "";//defaults to space separated
|
||||
String vMod = separator == "" && random().nextBoolean() ? " " + v + " " : v;
|
||||
assertQ(req( "q", "{!terms " + separator + " f=" +f+"}"+vMod)
|
||||
,"//result[@numFound='1']"
|
||||
);
|
||||
|
||||
// lucene range
|
||||
assertQ(req( "q", f + ":[\"" + v + "\" TO \"" + v + "\"]" )
|
||||
,"//result[@numFound='1']"
|
||||
);
|
||||
}
|
||||
|
||||
// terms qparser, no values matches nothing
|
||||
assertQ(req( "q", "*:*", "fq", "{!terms f=v_s}")
|
||||
,"//result[@numFound='0']"
|
||||
);
|
||||
|
||||
String termsMethod = new String[]{"termsFilter", "booleanQuery", "automaton", "docValuesTermsFilter"}[random().nextInt(4)];
|
||||
assertQ(req( "q", "{!terms f=v_s method=" + termsMethod + " separator=|}other stuff|wow dude")
|
||||
,"//result[@numFound='2']"
|
||||
);
|
||||
|
||||
|
||||
// frange and function query only work on single valued field types
|
||||
Object[] fc_vals = new Object[] {
|
||||
"id",999.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user