mirror of https://github.com/apache/lucene.git
SOLR-9786: FieldType.getSetQuery, QParser.flags/isFilter, solr query parser use getSetQuery when appropriate and other optimizations
This commit is contained in:
parent
615570c347
commit
9f24c4b59a
|
@ -88,6 +88,16 @@ Optimizations
|
|||
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
|
||||
comparator and only invalidate leafComparator. (John Call via yonik)
|
||||
|
||||
* SOLR-9786: FieldType has a new getSetQuery() method that can take a set of terms
|
||||
and create a more efficient query (such as TermsQuery). The solr query parser has been
|
||||
changed to use this method when appropriate. The parser also knows when it is being
|
||||
used to parse a filter and will create TermsQueries from large lists of normal terms
|
||||
or numbers, resulting in a query that will execute faster. This also acts to avoid
|
||||
BooleanQuery maximum clause limit. Query parsing itself has also been optimized,
|
||||
resulting in less produced garbage and 5-7% better performance.
|
||||
(yonik)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-9701: NPE in export handler when "fl" parameter is omitted.
|
||||
|
|
|
@ -200,10 +200,11 @@ public class QueryComponent extends SearchComponent
|
|||
if (fqs!=null && fqs.length!=0) {
|
||||
List<Query> filters = rb.getFilters();
|
||||
// if filters already exists, make a copy instead of modifying the original
|
||||
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
|
||||
filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
|
||||
for (String fq : fqs) {
|
||||
if (fq != null && fq.trim().length()!=0) {
|
||||
QParser fqp = QParser.getParser(fq, req);
|
||||
fqp.setIsFilter(true);
|
||||
filters.add(fqp.getQuery());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,11 +4,12 @@ package org.apache.solr.parser;
|
|||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
|
||||
|
||||
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
|
||||
|
@ -135,7 +136,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
addClause(clauses, conj, mods, q);
|
||||
}
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
{if (true) return firstQuery;}
|
||||
{if (true) return rawToNormal(firstQuery);}
|
||||
else {
|
||||
{if (true) return getBooleanQuery(clauses);}
|
||||
}
|
||||
|
@ -146,6 +147,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
Token localParams=null;
|
||||
int flags = 0;
|
||||
if (jj_2_1(2)) {
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case TERM:
|
||||
|
@ -195,6 +197,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
break;
|
||||
case FILTER:
|
||||
jj_consume_token(FILTER);
|
||||
flags=startFilter();
|
||||
q = Query(field);
|
||||
jj_consume_token(RPAREN);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
|
@ -206,7 +209,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
jj_la1[7] = jj_gen;
|
||||
;
|
||||
}
|
||||
q=getFilter(q);
|
||||
q=getFilter(q); restoreFlags(flags);
|
||||
break;
|
||||
case LPARAMS:
|
||||
localParams = jj_consume_token(LPARAMS);
|
||||
|
|
|
@ -190,7 +190,7 @@ Query Query(String field) throws SyntaxError :
|
|||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
return rawToNormal(firstQuery);
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
|
@ -201,6 +201,7 @@ Query Clause(String field) throws SyntaxError : {
|
|||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
Token localParams=null;
|
||||
int flags = 0;
|
||||
}
|
||||
{
|
||||
|
||||
|
@ -216,7 +217,7 @@ Query Clause(String field) throws SyntaxError : {
|
|||
(
|
||||
q=Term(field)
|
||||
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
|
||||
| (<FILTER> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); } )
|
||||
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } )
|
||||
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
|
||||
)
|
||||
{ return handleBoost(q, boost); }
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.parser;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
@ -61,6 +62,7 @@ import org.apache.solr.search.SyntaxError;
|
|||
*/
|
||||
public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
|
||||
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
|
||||
|
||||
static final int CONJ_NONE = 0;
|
||||
static final int CONJ_AND = 1;
|
||||
|
@ -89,7 +91,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||
|
||||
boolean autoGeneratePhraseQueries = false;
|
||||
|
||||
int flags;
|
||||
|
||||
protected IndexSchema schema;
|
||||
protected QParser parser;
|
||||
|
@ -125,6 +127,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
// internal: A simple raw fielded query
|
||||
public static class RawQuery extends Query {
|
||||
final SchemaField sfield;
|
||||
final String externalVal;
|
||||
|
||||
public RawQuery(SchemaField sfield, String externalVal) {
|
||||
this.sfield = sfield;
|
||||
this.externalVal = externalVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "RAW(" + field + "," + externalVal + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// So the generated QueryParser(CharStream) won't error out
|
||||
protected SolrQueryParserBase() {
|
||||
|
@ -138,10 +165,22 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
public void init(Version matchVersion, String defaultField, QParser parser) {
|
||||
this.schema = parser.getReq().getSchema();
|
||||
this.parser = parser;
|
||||
this.flags = parser.getFlags();
|
||||
this.defaultField = defaultField;
|
||||
setAnalyzer(schema.getQueryAnalyzer());
|
||||
}
|
||||
|
||||
// Turn on the "filter" bit and return the previous flags for the caller to save
|
||||
int startFilter() {
|
||||
int oldFlags = flags;
|
||||
flags |= QParser.FLAG_FILTER;
|
||||
return oldFlags;
|
||||
}
|
||||
|
||||
void restoreFlags(int flagsToRestore) {
|
||||
flags = flagsToRestore;
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
* @param query the query string to be parsed.
|
||||
*/
|
||||
|
@ -381,7 +420,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
*/
|
||||
protected Query getFieldQuery(String field, String queryText, int slop)
|
||||
throws SyntaxError {
|
||||
Query query = getFieldQuery(field, queryText, true);
|
||||
Query query = getFieldQuery(field, queryText, true, false);
|
||||
|
||||
// only set slop of the phrase query was a result of this parser
|
||||
// and not a sub-parser.
|
||||
|
@ -492,11 +531,77 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
if (clauses.size()==0) {
|
||||
return null; // all clause words were filtered away by the analyzer.
|
||||
}
|
||||
BooleanQuery.Builder query = newBooleanQuery();
|
||||
for(final BooleanClause clause: clauses) {
|
||||
query.add(clause);
|
||||
|
||||
SchemaField sfield = null;
|
||||
List<String> fieldValues = null;
|
||||
|
||||
|
||||
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD;
|
||||
int clausesAdded = 0;
|
||||
|
||||
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
|
||||
Map<SchemaField, List<String>> fmap = new HashMap<>();
|
||||
|
||||
for (BooleanClause clause : clauses) {
|
||||
Query subq = clause.getQuery();
|
||||
if (subq instanceof RawQuery) {
|
||||
if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
|
||||
// We only collect optional terms for set queries. Since this isn't optional,
|
||||
// convert the raw query to a normal query and handle as usual.
|
||||
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
|
||||
} else {
|
||||
// Optional raw query.
|
||||
RawQuery rawq = (RawQuery) subq;
|
||||
|
||||
// only look up fmap and type info on a field change
|
||||
if (sfield != rawq.sfield) {
|
||||
sfield = rawq.sfield;
|
||||
fieldValues = fmap.get(sfield);
|
||||
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
|
||||
// We are currently relying on things like PointField not being marked as indexed in order to bypass
|
||||
// the "useTermQuery" check.
|
||||
if (fieldValues == null && useTermsQuery || !sfield.indexed()) {
|
||||
fieldValues = new ArrayList<>(2);
|
||||
fmap.put(sfield, fieldValues);
|
||||
}
|
||||
return query.build();
|
||||
}
|
||||
|
||||
if (fieldValues != null) {
|
||||
fieldValues.add(rawq.externalVal);
|
||||
continue;
|
||||
}
|
||||
|
||||
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
|
||||
}
|
||||
}
|
||||
|
||||
clausesAdded++;
|
||||
booleanBuilder.add(clause);
|
||||
}
|
||||
|
||||
|
||||
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) {
|
||||
sfield = entry.getKey();
|
||||
fieldValues = entry.getValue();
|
||||
FieldType ft = sfield.getType();
|
||||
|
||||
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
|
||||
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) {
|
||||
// use boolean query instead
|
||||
for (String externalVal : fieldValues) {
|
||||
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal);
|
||||
clausesAdded++;
|
||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
} else {
|
||||
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues);
|
||||
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query
|
||||
clausesAdded++;
|
||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
|
||||
return booleanBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
|
@ -526,7 +631,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
q = getFuzzyQuery(qfield, termImage, fms);
|
||||
} else {
|
||||
String termImage=discardEscapeChar(term.image);
|
||||
q = getFieldQuery(qfield, termImage, false);
|
||||
q = getFieldQuery(qfield, termImage, false, true);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
@ -540,10 +645,15 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
}
|
||||
catch (Exception ignored) { }
|
||||
}
|
||||
return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
|
||||
|
||||
String raw = discardEscapeChar(term.image.substring(1, term.image.length()-1));
|
||||
return getFieldQuery(qfield, raw, s);
|
||||
}
|
||||
|
||||
// called from parser
|
||||
|
||||
|
||||
// Called from parser
|
||||
// Raw queries are transformed to normal queries before wrapping in a BoostQuery
|
||||
Query handleBoost(Query q, Token boost) {
|
||||
// q==null check is to avoid boosting null queries, such as those caused by stop words
|
||||
if (boost == null || boost.image.length()==0 || q == null) {
|
||||
|
@ -556,14 +666,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
if (q instanceof ConstantScoreQuery || q instanceof SolrConstantScoreQuery) {
|
||||
// skip
|
||||
} else {
|
||||
newQ = new ConstantScoreQuery(q);
|
||||
newQ = new ConstantScoreQuery( rawToNormal(q) );
|
||||
}
|
||||
return new BoostQuery(newQ, val);
|
||||
}
|
||||
|
||||
float boostVal = Float.parseFloat(boost.image);
|
||||
|
||||
return new BoostQuery(q, boostVal);
|
||||
return new BoostQuery( rawToNormal(q), boostVal);
|
||||
}
|
||||
|
||||
|
||||
|
@ -577,17 +687,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
*
|
||||
*/
|
||||
String discardEscapeChar(String input) throws SyntaxError {
|
||||
int start = input.indexOf('\\');
|
||||
if (start < 0) return input;
|
||||
|
||||
// Create char array to hold unescaped char sequence
|
||||
char[] output = new char[input.length()];
|
||||
input.getChars(0, start, output, 0);
|
||||
|
||||
// The length of the output can be less than the input
|
||||
// due to discarded escape chars. This variable holds
|
||||
// the actual length of the output
|
||||
int length = 0;
|
||||
int length = start;
|
||||
|
||||
// We remember whether the last processed character was
|
||||
// an escape character
|
||||
boolean lastCharWasEscapeChar = false;
|
||||
boolean lastCharWasEscapeChar = true;
|
||||
|
||||
// The multiplier the current unicode digit must be multiplied with.
|
||||
// E. g. the first digit must be multiplied with 16^3, the second with 16^2...
|
||||
|
@ -596,7 +710,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
// Used to calculate the codepoint of the escaped unicode character
|
||||
int codePoint = 0;
|
||||
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
// start after the first escape char
|
||||
for (int i = start+1; i < input.length(); i++) {
|
||||
char curChar = input.charAt(i);
|
||||
if (codePointMultiplier > 0) {
|
||||
codePoint += hexToInt(curChar) * codePointMultiplier;
|
||||
|
@ -715,8 +830,32 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
|
||||
private QParser subQParser = null;
|
||||
|
||||
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
|
||||
Query rawToNormal(Query q) {
|
||||
if (!(q instanceof RawQuery)) return q;
|
||||
RawQuery rq = (RawQuery)q;
|
||||
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal);
|
||||
}
|
||||
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
|
||||
return getFieldQuery(field, queryText, quoted, false);
|
||||
}
|
||||
|
||||
// private use for getFieldQuery
|
||||
private String lastFieldName;
|
||||
private SchemaField lastField;
|
||||
|
||||
// if raw==true, then it's possible for this method to return a RawQuery that will need to be transformed
|
||||
// further before using.
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
|
||||
SchemaField sf;
|
||||
if (field.equals(lastFieldName)) {
|
||||
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
|
||||
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
|
||||
sf = lastField;
|
||||
} else {
|
||||
// intercept magic field name of "_" to use as a hook for our
|
||||
// own functions.
|
||||
if (field.charAt(0) == '_' && parser != null) {
|
||||
|
@ -726,22 +865,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
return subQParser.getQuery();
|
||||
}
|
||||
}
|
||||
SchemaField sf = schema.getFieldOrNull(field);
|
||||
|
||||
lastFieldName = field;
|
||||
sf = lastField = schema.getFieldOrNull(field);
|
||||
}
|
||||
|
||||
if (sf != null) {
|
||||
FieldType ft = sf.getType();
|
||||
// delegate to type for everything except tokenized fields
|
||||
if (ft.isTokenized() && sf.indexed()) {
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries()));
|
||||
} else {
|
||||
if (raw) {
|
||||
return new RawQuery(sf, queryText);
|
||||
} else {
|
||||
return sf.getType().getFieldQuery(parser, sf, queryText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// default to a normal field query
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// called from parser
|
||||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
|
||||
checkNullField(field);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.schema;
|
|||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -38,7 +39,10 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesRangeQuery;
|
||||
import org.apache.lucene.search.DocValuesRewriteMethod;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
|
@ -56,8 +60,8 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.analysis.SolrAnalyzer;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
@ -759,6 +763,26 @@ public abstract class FieldType extends FieldProperties {
|
|||
}
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public Query getSetQuery(QParser parser, SchemaField field, Collection<String> externalVals) {
|
||||
if (!field.indexed()) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (String externalVal : externalVals) {
|
||||
Query subq = getFieldQuery(parser, field, externalVal);
|
||||
builder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
List<BytesRef> lst = new ArrayList<>(externalVals.size());
|
||||
BytesRefBuilder br = new BytesRefBuilder();
|
||||
for (String externalVal : externalVals) {
|
||||
readableToIndexed(externalVal, br);
|
||||
lst.add( br.toBytesRef() );
|
||||
}
|
||||
return new TermsQuery(field.getName() , lst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Returns the rewrite method for multiterm queries such as wildcards.
|
||||
* @param parser The {@link org.apache.solr.search.QParser} calling the method
|
||||
|
|
|
@ -1034,8 +1034,8 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Query getFieldQuery(String field, String val, boolean quoted) throws SyntaxError {
|
||||
this.type = QType.FIELD;
|
||||
protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError {
|
||||
this.type = quoted ? QType.PHRASE : QType.FIELD;
|
||||
this.field = field;
|
||||
this.val = val;
|
||||
this.slop = getPhraseSlop(); // unspecified
|
||||
|
@ -1216,7 +1216,7 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
switch (type) {
|
||||
case FIELD: // fallthrough
|
||||
case PHRASE:
|
||||
Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
|
||||
Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
|
||||
// A BooleanQuery is only possible from getFieldQuery if it came from
|
||||
// a single whitespace separated term. In this case, check the coordination
|
||||
// factor on the query: if it's enabled, that means we aren't a set of synonyms
|
||||
|
|
|
@ -46,7 +46,9 @@ public class FunctionRangeQParserPlugin extends QParserPlugin {
|
|||
@Override
|
||||
public Query parse() throws SyntaxError {
|
||||
funcStr = localParams.get(QueryParsing.V, null);
|
||||
Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).getQuery();
|
||||
QParser subParser = subQuery(funcStr, FunctionQParserPlugin.NAME);
|
||||
subParser.setIsFilter(false); // the range can be based on the relevancy score of embedded queries.
|
||||
Query funcQ = subParser.getQuery();
|
||||
if (funcQ instanceof FunctionQuery) {
|
||||
vs = ((FunctionQuery)funcQ).getValueSource();
|
||||
} else {
|
||||
|
|
|
@ -32,12 +32,18 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
public abstract class QParser {
|
||||
/** @lucene.experimental */
|
||||
public static final int FLAG_FILTER = 0x01;
|
||||
|
||||
protected String qstr;
|
||||
protected SolrParams params;
|
||||
protected SolrParams localParams;
|
||||
protected SolrQueryRequest req;
|
||||
protected int recurseCount;
|
||||
|
||||
/** @lucene.experimental */
|
||||
protected int flags;
|
||||
|
||||
protected Query query;
|
||||
|
||||
protected String stringIncludingLocalParams; // the original query string including any local params
|
||||
|
@ -83,6 +89,28 @@ public abstract class QParser {
|
|||
this.req = req;
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public void setFlags(int flags) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public int getFlags() {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/** @lucene.experimental Query is in the context of a filter, where scores don't matter */
|
||||
public boolean isFilter() {
|
||||
return (flags & FLAG_FILTER) != 0;
|
||||
}
|
||||
|
||||
/** @lucene.experimental */
|
||||
public void setIsFilter(boolean isFilter) {
|
||||
if (isFilter)
|
||||
flags |= FLAG_FILTER;
|
||||
else
|
||||
flags &= ~FLAG_FILTER;
|
||||
}
|
||||
|
||||
private static void addTag(Map<Object,Collection<Object>> tagMap, Object key, Object val) {
|
||||
Collection<Object> lst = tagMap.get(key);
|
||||
|
@ -201,6 +229,7 @@ public abstract class QParser {
|
|||
defaultType = localParams.get(QueryParsing.DEFTYPE);
|
||||
}
|
||||
QParser nestedParser = getParser(q, defaultType, getReq());
|
||||
nestedParser.flags = this.flags; // TODO: this would be better passed in to the constructor... change to a ParserContext object?
|
||||
nestedParser.recurseCount = recurseCount;
|
||||
recurseCount--;
|
||||
return nestedParser;
|
||||
|
|
|
@ -908,7 +908,7 @@ public class SolrPluginUtils {
|
|||
* aliases should work)
|
||||
*/
|
||||
@Override
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted)
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw)
|
||||
throws SyntaxError {
|
||||
|
||||
if (aliases.containsKey(field)) {
|
||||
|
@ -918,7 +918,7 @@ public class SolrPluginUtils {
|
|||
List<Query> disjuncts = new ArrayList<>();
|
||||
for (String f : a.fields.keySet()) {
|
||||
|
||||
Query sub = getFieldQuery(f,queryText,quoted);
|
||||
Query sub = getFieldQuery(f,queryText,quoted, false);
|
||||
if (null != sub) {
|
||||
if (null != a.fields.get(f)) {
|
||||
sub = new BoostQuery(sub, a.fields.get(f));
|
||||
|
@ -932,7 +932,7 @@ public class SolrPluginUtils {
|
|||
|
||||
} else {
|
||||
try {
|
||||
return super.getFieldQuery(field, queryText, quoted);
|
||||
return super.getFieldQuery(field, queryText, quoted, raw);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -1453,11 +1453,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
|
|||
|
||||
@Override
|
||||
protected Query getFieldQuery(String field,
|
||||
String val, boolean quoted) throws SyntaxError {
|
||||
String val, boolean quoted, boolean raw) throws SyntaxError {
|
||||
if(frequentlyMisspelledWords.contains(val)) {
|
||||
return getFuzzyQuery(field, val, 0.75F);
|
||||
}
|
||||
return super.getFieldQuery(field, val, quoted);
|
||||
return super.getFieldQuery(field, val, quoted, raw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,11 +16,20 @@
|
|||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.parser.QueryParser;
|
||||
import org.apache.solr.query.FilterQuery;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -37,9 +46,9 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
public static void createIndex() {
|
||||
String v;
|
||||
v = "how now brown cow";
|
||||
assertU(adoc("id", "1", "text", v, "text_np", v));
|
||||
assertU(adoc("id", "1", "text", v, "text_np", v, "foo_i","11"));
|
||||
v = "now cow";
|
||||
assertU(adoc("id", "2", "text", v, "text_np", v));
|
||||
assertU(adoc("id", "2", "text", v, "text_np", v, "foo_i","12"));
|
||||
assertU(adoc("id", "3", "foo_s", "a ' \" \\ {! ) } ( { z")); // A value filled with special chars
|
||||
|
||||
assertU(adoc("id", "10", "qqq_s", "X"));
|
||||
|
@ -184,6 +193,92 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
req.close();
|
||||
}
|
||||
|
||||
|
||||
// automatically use TermsQuery when appropriate
|
||||
@Test
|
||||
public void testAutoTerms() throws Exception {
|
||||
SolrQueryRequest req = req();
|
||||
QParser qParser;
|
||||
Query q,qq;
|
||||
|
||||
// relevance query should not be a filter
|
||||
qParser = QParser.getParser("foo_s:(a b c)", req);
|
||||
q = qParser.getQuery();
|
||||
assertEquals(3, ((BooleanQuery)q).clauses().size());
|
||||
|
||||
// small filter query should still use BooleanQuery
|
||||
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
|
||||
qParser = QParser.getParser("foo_s:(a b c)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(3, ((BooleanQuery) q).clauses().size());
|
||||
}
|
||||
|
||||
// large relevancy query should use BooleanQuery
|
||||
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
|
||||
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
||||
q = qParser.getQuery();
|
||||
assertEquals(26, ((BooleanQuery)q).clauses().size());
|
||||
|
||||
// large filter query should use TermsQuery
|
||||
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(26, ((TermsQuery)q).getTermData().size());
|
||||
|
||||
// large numeric filter query should use TermsQuery (for trie fields)
|
||||
qParser = QParser.getParser("foo_i:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(20, ((TermsQuery)q).getTermData().size());
|
||||
|
||||
// a filter() clause inside a relevancy query should be able to use a TermsQuery
|
||||
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
|
||||
q = qParser.getQuery();
|
||||
assertEquals(2, ((BooleanQuery)q).clauses().size());
|
||||
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
|
||||
if (qq instanceof TermQuery) {
|
||||
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
|
||||
}
|
||||
|
||||
if (qq instanceof FilterQuery) {
|
||||
qq = ((FilterQuery)qq).getQuery();
|
||||
}
|
||||
|
||||
assertEquals(26, ((TermsQuery)qq).getTermData().size());
|
||||
|
||||
// test mixed boolean query, including quotes (which shouldn't matter)
|
||||
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
q = qParser.getQuery();
|
||||
assertEquals(4, ((BooleanQuery)q).clauses().size());
|
||||
qq = null;
|
||||
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
||||
qq = clause.getQuery();
|
||||
if (qq instanceof TermsQuery) break;
|
||||
}
|
||||
assertEquals(26, ((TermsQuery)qq).getTermData().size());
|
||||
|
||||
req.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testManyClauses() throws Exception {
|
||||
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
|
||||
StringBuilder sb = new StringBuilder("id:(");
|
||||
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
|
||||
sb.append('z').append(i).append(' ');
|
||||
}
|
||||
sb.append(a);
|
||||
sb.append(")");
|
||||
|
||||
String q = sb.toString();
|
||||
|
||||
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
|
||||
assertJQ(req("q","*:*", "fq", q)
|
||||
,"/response/numFound==6");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComments() throws Exception {
|
||||
assertJQ(req("q", "id:1 id:2 /* *:* */ id:3")
|
||||
|
@ -317,4 +412,103 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
// parsing performance test
|
||||
// Run from command line with ant test -Dtestcase=TestSolrQueryParser -Dtestmethod=testParsingPerformance -Dtests.asserts=false 2>/dev/null | grep QPS
|
||||
@Test
|
||||
public void testParsingPerformance() throws Exception {
|
||||
String[] args = {"-queries","100" ,"-iter","1000", "-clauses","100", "-format","term%d", "-seed","0"};
|
||||
args = new String[] {"-queries","1000" ,"-iter","2000", "-clauses","10", "-format","term%d", "-seed","0"};
|
||||
// args = new String[] {"-queries","1000" ,"-iter","1000000000", "-clauses","10", "-format","term%d", "-seed","0"};
|
||||
|
||||
boolean assertOn = false;
|
||||
assert assertOn = true;
|
||||
if (assertOn) {
|
||||
// System.out.println("WARNING! Assertions are enabled!!!! Will only execute small run. Change with -Dtests.asserts=false");
|
||||
args = new String[]{"-queries","10" ,"-iter","2", "-clauses","20", "-format","term%d", "-seed","0"};
|
||||
}
|
||||
|
||||
|
||||
int iter = 1000;
|
||||
int numQueries = 100;
|
||||
int maxClauses = 5;
|
||||
int maxTerm = 10000000;
|
||||
String format = "term%d";
|
||||
String field = "foo_s";
|
||||
long seed = 0;
|
||||
boolean isFilter = true;
|
||||
boolean rewrite = false;
|
||||
|
||||
String otherStuff = "";
|
||||
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String a = args[i];
|
||||
if ("-queries".equals(a)) {
|
||||
numQueries = Integer.parseInt(args[++i]);
|
||||
} else if ("-iter".equals(a)) {
|
||||
iter = Integer.parseInt(args[++i]);
|
||||
} else if ("-clauses".equals(a)) {
|
||||
maxClauses = Integer.parseInt(args[++i]);
|
||||
} else if ("-format".equals(a)) {
|
||||
format = args[++i];
|
||||
} else if ("-seed".equals(a)) {
|
||||
seed = Long.parseLong(args[++i]);
|
||||
} else {
|
||||
otherStuff = otherStuff + " " + a;
|
||||
}
|
||||
}
|
||||
|
||||
Random r = new Random(seed);
|
||||
|
||||
String[] queries = new String[numQueries];
|
||||
for (int i = 0; i < queries.length; i++) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
boolean explicitField = r.nextInt(5) == 0;
|
||||
if (!explicitField) {
|
||||
sb.append(field + ":(");
|
||||
}
|
||||
|
||||
sb.append(otherStuff).append(" ");
|
||||
|
||||
int nClauses = r.nextInt(maxClauses) + 1; // TODO: query parse can't parse () for some reason???
|
||||
|
||||
for (int c = 0; c<nClauses; c++) {
|
||||
String termString = String.format(Locale.US, format, r.nextInt(maxTerm));
|
||||
if (explicitField) {
|
||||
sb.append(field).append(':');
|
||||
}
|
||||
sb.append(termString);
|
||||
sb.append(' ');
|
||||
}
|
||||
|
||||
if (!explicitField) {
|
||||
sb.append(")");
|
||||
}
|
||||
queries[i] = sb.toString();
|
||||
// System.out.println(queries[i]);
|
||||
}
|
||||
|
||||
SolrQueryRequest req = req();
|
||||
|
||||
long start = System.nanoTime();
|
||||
|
||||
int ret = 0;
|
||||
for (int i=0; i<iter; i++) {
|
||||
for (String qStr : queries) {
|
||||
QParser parser = QParser.getParser(qStr,req);
|
||||
parser.setIsFilter(isFilter);
|
||||
Query q = parser.getQuery();
|
||||
if (rewrite) {
|
||||
// TODO: do rewrite
|
||||
}
|
||||
ret += q.getClass().hashCode(); // use the query somehow
|
||||
}
|
||||
}
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
||||
System.out.println((assertOn ? "WARNING, assertions enabled. " : "") + "ret=" + ret + " Parser QPS:" + ((long)numQueries * iter)*1000000000/(end-start));
|
||||
|
||||
req.close();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue