SOLR-9786: FieldType.getSetQuery, QParser.flags/isFilter, solr query parser use getSetQuery when appropriate and other optimizations

This commit is contained in:
yonik 2016-11-22 12:32:53 -05:00
parent 6a83f0fa3a
commit bf9db95f21
12 changed files with 456 additions and 44 deletions

View File

@ -142,6 +142,16 @@ Optimizations
* SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse * SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse
comparator and only invalidate leafComparator. (John Call via yonik) comparator and only invalidate leafComparator. (John Call via yonik)
* SOLR-9786: FieldType has a new getSetQuery() method that can take a set of terms
and create a more efficient query (such as TermsQuery). The solr query parser has been
changed to use this method when appropriate. The parser also knows when it is being
used to parse a filter and will create TermsQueries from large lists of normal terms
or numbers, resulting in a query that will execute faster. This also acts to avoid
BooleanQuery maximum clause limit. Query parsing itself has also been optimized,
resulting in less produced garbage and 5-7% better performance.
(yonik)
Bug Fixes Bug Fixes
---------------------- ----------------------
* SOLR-9701: NPE in export handler when "fl" parameter is omitted. * SOLR-9701: NPE in export handler when "fl" parameter is omitted.

View File

@ -199,10 +199,11 @@ public class QueryComponent extends SearchComponent
if (fqs!=null && fqs.length!=0) { if (fqs!=null && fqs.length!=0) {
List<Query> filters = rb.getFilters(); List<Query> filters = rb.getFilters();
// if filters already exists, make a copy instead of modifying the original // if filters already exists, make a copy instead of modifying the original
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters); filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) { for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) { if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, req); QParser fqp = QParser.getParser(fq, req);
fqp.setIsFilter(true);
filters.add(fqp.getQuery()); filters.add(fqp.getQuery());
} }
} }

View File

@ -4,11 +4,12 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError;
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants { public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
@ -135,7 +136,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
addClause(clauses, conj, mods, q); addClause(clauses, conj, mods, q);
} }
if (clauses.size() == 1 && firstQuery != null) if (clauses.size() == 1 && firstQuery != null)
{if (true) return firstQuery;} {if (true) return rawToNormal(firstQuery);}
else { else {
{if (true) return getBooleanQuery(clauses);} {if (true) return getBooleanQuery(clauses);}
} }
@ -146,6 +147,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
Query q; Query q;
Token fieldToken=null, boost=null; Token fieldToken=null, boost=null;
Token localParams=null; Token localParams=null;
int flags = 0;
if (jj_2_1(2)) { if (jj_2_1(2)) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM: case TERM:
@ -195,6 +197,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
break; break;
case FILTER: case FILTER:
jj_consume_token(FILTER); jj_consume_token(FILTER);
flags=startFilter();
q = Query(field); q = Query(field);
jj_consume_token(RPAREN); jj_consume_token(RPAREN);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@ -206,7 +209,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_la1[7] = jj_gen; jj_la1[7] = jj_gen;
; ;
} }
q=getFilter(q); q=getFilter(q); restoreFlags(flags);
break; break;
case LPARAMS: case LPARAMS:
localParams = jj_consume_token(LPARAMS); localParams = jj_consume_token(LPARAMS);

View File

@ -190,7 +190,7 @@ Query Query(String field) throws SyntaxError :
)* )*
{ {
if (clauses.size() == 1 && firstQuery != null) if (clauses.size() == 1 && firstQuery != null)
return firstQuery; return rawToNormal(firstQuery);
else { else {
return getBooleanQuery(clauses); return getBooleanQuery(clauses);
} }
@ -201,6 +201,7 @@ Query Clause(String field) throws SyntaxError : {
Query q; Query q;
Token fieldToken=null, boost=null; Token fieldToken=null, boost=null;
Token localParams=null; Token localParams=null;
int flags = 0;
} }
{ {
@ -216,7 +217,7 @@ Query Clause(String field) throws SyntaxError : {
( (
q=Term(field) q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
| (<FILTER> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); } ) | (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } )
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } ) | (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
) )
{ return handleBoost(q, boost); } { return handleBoost(q, boost); }

View File

@ -17,6 +17,7 @@
package org.apache.solr.parser; package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -61,6 +62,7 @@ import org.apache.solr.search.SyntaxError;
*/ */
public abstract class SolrQueryParserBase extends QueryBuilder { public abstract class SolrQueryParserBase extends QueryBuilder {
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
static final int CONJ_NONE = 0; static final int CONJ_NONE = 0;
static final int CONJ_AND = 1; static final int CONJ_AND = 1;
@ -89,7 +91,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
boolean autoGeneratePhraseQueries = false; boolean autoGeneratePhraseQueries = false;
int flags;
protected IndexSchema schema; protected IndexSchema schema;
protected QParser parser; protected QParser parser;
@ -125,6 +127,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} }
} }
// internal: A simple raw fielded query
public static class RawQuery extends Query {
final SchemaField sfield;
final String externalVal;
public RawQuery(SchemaField sfield, String externalVal) {
this.sfield = sfield;
this.externalVal = externalVal;
}
@Override
public String toString(String field) {
return "RAW(" + field + "," + externalVal + ")";
}
@Override
public boolean equals(Object obj) {
return false;
}
@Override
public int hashCode() {
return 0;
}
}
// So the generated QueryParser(CharStream) won't error out // So the generated QueryParser(CharStream) won't error out
protected SolrQueryParserBase() { protected SolrQueryParserBase() {
@ -138,10 +165,22 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
public void init(Version matchVersion, String defaultField, QParser parser) { public void init(Version matchVersion, String defaultField, QParser parser) {
this.schema = parser.getReq().getSchema(); this.schema = parser.getReq().getSchema();
this.parser = parser; this.parser = parser;
this.flags = parser.getFlags();
this.defaultField = defaultField; this.defaultField = defaultField;
setAnalyzer(schema.getQueryAnalyzer()); setAnalyzer(schema.getQueryAnalyzer());
} }
// Turn on the "filter" bit and return the previous flags for the caller to save
int startFilter() {
int oldFlags = flags;
flags |= QParser.FLAG_FILTER;
return oldFlags;
}
void restoreFlags(int flagsToRestore) {
flags = flagsToRestore;
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}. /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed. * @param query the query string to be parsed.
*/ */
@ -381,7 +420,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
*/ */
protected Query getFieldQuery(String field, String queryText, int slop) protected Query getFieldQuery(String field, String queryText, int slop)
throws SyntaxError { throws SyntaxError {
Query query = getFieldQuery(field, queryText, true); Query query = getFieldQuery(field, queryText, true, false);
// only set slop of the phrase query was a result of this parser // only set slop of the phrase query was a result of this parser
// and not a sub-parser. // and not a sub-parser.
@ -492,11 +531,77 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
if (clauses.size()==0) { if (clauses.size()==0) {
return null; // all clause words were filtered away by the analyzer. return null; // all clause words were filtered away by the analyzer.
} }
BooleanQuery.Builder query = newBooleanQuery();
for(final BooleanClause clause: clauses) { SchemaField sfield = null;
query.add(clause); List<String> fieldValues = null;
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD;
int clausesAdded = 0;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
Map<SchemaField, List<String>> fmap = new HashMap<>();
for (BooleanClause clause : clauses) {
Query subq = clause.getQuery();
if (subq instanceof RawQuery) {
if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
// We only collect optional terms for set queries. Since this isn't optional,
// convert the raw query to a normal query and handle as usual.
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
} else {
// Optional raw query.
RawQuery rawq = (RawQuery) subq;
// only look up fmap and type info on a field change
if (sfield != rawq.sfield) {
sfield = rawq.sfield;
fieldValues = fmap.get(sfield);
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
// We are currently relying on things like PointField not being marked as indexed in order to bypass
// the "useTermQuery" check.
if (fieldValues == null && useTermsQuery || !sfield.indexed()) {
fieldValues = new ArrayList<>(2);
fmap.put(sfield, fieldValues);
} }
return query.build(); }
if (fieldValues != null) {
fieldValues.add(rawq.externalVal);
continue;
}
clause = new BooleanClause( rawToNormal(subq), clause.getOccur() );
}
}
clausesAdded++;
booleanBuilder.add(clause);
}
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) {
sfield = entry.getKey();
fieldValues = entry.getValue();
FieldType ft = sfield.getType();
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) {
// use boolean query instead
for (String externalVal : fieldValues) {
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal);
clausesAdded++;
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
} else {
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues);
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query
clausesAdded++;
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
return booleanBuilder.build();
} }
@ -526,7 +631,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
q = getFuzzyQuery(qfield, termImage, fms); q = getFuzzyQuery(qfield, termImage, fms);
} else { } else {
String termImage=discardEscapeChar(term.image); String termImage=discardEscapeChar(term.image);
q = getFieldQuery(qfield, termImage, false); q = getFieldQuery(qfield, termImage, false, true);
} }
return q; return q;
} }
@ -540,10 +645,15 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} }
catch (Exception ignored) { } catch (Exception ignored) { }
} }
return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
String raw = discardEscapeChar(term.image.substring(1, term.image.length()-1));
return getFieldQuery(qfield, raw, s);
} }
// called from parser
// Called from parser
// Raw queries are transformed to normal queries before wrapping in a BoostQuery
Query handleBoost(Query q, Token boost) { Query handleBoost(Query q, Token boost) {
// q==null check is to avoid boosting null queries, such as those caused by stop words // q==null check is to avoid boosting null queries, such as those caused by stop words
if (boost == null || boost.image.length()==0 || q == null) { if (boost == null || boost.image.length()==0 || q == null) {
@ -556,14 +666,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
if (q instanceof ConstantScoreQuery || q instanceof SolrConstantScoreQuery) { if (q instanceof ConstantScoreQuery || q instanceof SolrConstantScoreQuery) {
// skip // skip
} else { } else {
newQ = new ConstantScoreQuery(q); newQ = new ConstantScoreQuery( rawToNormal(q) );
} }
return new BoostQuery(newQ, val); return new BoostQuery(newQ, val);
} }
float boostVal = Float.parseFloat(boost.image); float boostVal = Float.parseFloat(boost.image);
return new BoostQuery(q, boostVal); return new BoostQuery( rawToNormal(q), boostVal);
} }
@ -577,17 +687,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
* *
*/ */
String discardEscapeChar(String input) throws SyntaxError { String discardEscapeChar(String input) throws SyntaxError {
int start = input.indexOf('\\');
if (start < 0) return input;
// Create char array to hold unescaped char sequence // Create char array to hold unescaped char sequence
char[] output = new char[input.length()]; char[] output = new char[input.length()];
input.getChars(0, start, output, 0);
// The length of the output can be less than the input // The length of the output can be less than the input
// due to discarded escape chars. This variable holds // due to discarded escape chars. This variable holds
// the actual length of the output // the actual length of the output
int length = 0; int length = start;
// We remember whether the last processed character was // We remember whether the last processed character was
// an escape character // an escape character
boolean lastCharWasEscapeChar = false; boolean lastCharWasEscapeChar = true;
// The multiplier the current unicode digit must be multiplied with. // The multiplier the current unicode digit must be multiplied with.
// E. g. the first digit must be multiplied with 16^3, the second with 16^2... // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
@ -596,7 +710,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// Used to calculate the codepoint of the escaped unicode character // Used to calculate the codepoint of the escaped unicode character
int codePoint = 0; int codePoint = 0;
for (int i = 0; i < input.length(); i++) { // start after the first escape char
for (int i = start+1; i < input.length(); i++) {
char curChar = input.charAt(i); char curChar = input.charAt(i);
if (codePointMultiplier > 0) { if (codePointMultiplier > 0) {
codePoint += hexToInt(curChar) * codePointMultiplier; codePoint += hexToInt(curChar) * codePointMultiplier;
@ -715,8 +830,32 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
private QParser subQParser = null; private QParser subQParser = null;
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
Query rawToNormal(Query q) {
if (!(q instanceof RawQuery)) return q;
RawQuery rq = (RawQuery)q;
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal);
}
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError { protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
return getFieldQuery(field, queryText, quoted, false);
}
// private use for getFieldQuery
private String lastFieldName;
private SchemaField lastField;
// if raw==true, then it's possible for this method to return a RawQuery that will need to be transformed
// further before using.
protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError {
checkNullField(field); checkNullField(field);
SchemaField sf;
if (field.equals(lastFieldName)) {
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
sf = lastField;
} else {
// intercept magic field name of "_" to use as a hook for our // intercept magic field name of "_" to use as a hook for our
// own functions. // own functions.
if (field.charAt(0) == '_' && parser != null) { if (field.charAt(0) == '_' && parser != null) {
@ -726,22 +865,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
return subQParser.getQuery(); return subQParser.getQuery();
} }
} }
SchemaField sf = schema.getFieldOrNull(field);
lastFieldName = field;
sf = lastField = schema.getFieldOrNull(field);
}
if (sf != null) { if (sf != null) {
FieldType ft = sf.getType(); FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields // delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) { if (ft.isTokenized() && sf.indexed()) {
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries())); return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries()));
} else {
if (raw) {
return new RawQuery(sf, queryText);
} else { } else {
return sf.getType().getFieldQuery(parser, sf, queryText); return sf.getType().getFieldQuery(parser, sf, queryText);
} }
} }
}
// default to a normal field query // default to a normal field query
return newFieldQuery(getAnalyzer(), field, queryText, quoted); return newFieldQuery(getAnalyzer(), field, queryText, quoted);
} }
// called from parser // called from parser
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError { protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
checkNullField(field); checkNullField(field);

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -38,7 +39,10 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyNumericType; import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesRangeQuery; import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.DocValuesRewriteMethod; import org.apache.lucene.search.DocValuesRewriteMethod;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
@ -56,8 +60,8 @@ import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
@ -744,6 +748,26 @@ public abstract class FieldType extends FieldProperties {
} }
} }
/** @lucene.experimental */
public Query getSetQuery(QParser parser, SchemaField field, Collection<String> externalVals) {
if (!field.indexed()) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String externalVal : externalVals) {
Query subq = getFieldQuery(parser, field, externalVal);
builder.add(subq, BooleanClause.Occur.SHOULD);
}
return builder.build();
}
List<BytesRef> lst = new ArrayList<>(externalVals.size());
BytesRefBuilder br = new BytesRefBuilder();
for (String externalVal : externalVals) {
readableToIndexed(externalVal, br);
lst.add( br.toBytesRef() );
}
return new TermsQuery(field.getName() , lst);
}
/** /**
* Expert: Returns the rewrite method for multiterm queries such as wildcards. * Expert: Returns the rewrite method for multiterm queries such as wildcards.
* @param parser The {@link org.apache.solr.search.QParser} calling the method * @param parser The {@link org.apache.solr.search.QParser} calling the method

View File

@ -1032,8 +1032,8 @@ public class ExtendedDismaxQParser extends QParser {
} }
@Override @Override
protected Query getFieldQuery(String field, String val, boolean quoted) throws SyntaxError { protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError {
this.type = QType.FIELD; this.type = quoted ? QType.PHRASE : QType.FIELD;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.slop = getPhraseSlop(); // unspecified this.slop = getPhraseSlop(); // unspecified
@ -1212,7 +1212,7 @@ public class ExtendedDismaxQParser extends QParser {
switch (type) { switch (type) {
case FIELD: // fallthrough case FIELD: // fallthrough
case PHRASE: case PHRASE:
Query query = super.getFieldQuery(field, val, type == QType.PHRASE); Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
// Boolean query on a whitespace-separated string // Boolean query on a whitespace-separated string
// If these were synonyms we would have a SynonymQuery // If these were synonyms we would have a SynonymQuery
if (query instanceof BooleanQuery) { if (query instanceof BooleanQuery) {

View File

@ -46,7 +46,9 @@ public class FunctionRangeQParserPlugin extends QParserPlugin {
@Override @Override
public Query parse() throws SyntaxError { public Query parse() throws SyntaxError {
funcStr = localParams.get(QueryParsing.V, null); funcStr = localParams.get(QueryParsing.V, null);
Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).getQuery(); QParser subParser = subQuery(funcStr, FunctionQParserPlugin.NAME);
subParser.setIsFilter(false); // the range can be based on the relevancy score of embedded queries.
Query funcQ = subParser.getQuery();
if (funcQ instanceof FunctionQuery) { if (funcQ instanceof FunctionQuery) {
vs = ((FunctionQuery)funcQ).getValueSource(); vs = ((FunctionQuery)funcQ).getValueSource();
} else { } else {

View File

@ -32,12 +32,18 @@ import java.util.*;
* *
*/ */
public abstract class QParser { public abstract class QParser {
/** @lucene.experimental */
public static final int FLAG_FILTER = 0x01;
protected String qstr; protected String qstr;
protected SolrParams params; protected SolrParams params;
protected SolrParams localParams; protected SolrParams localParams;
protected SolrQueryRequest req; protected SolrQueryRequest req;
protected int recurseCount; protected int recurseCount;
/** @lucene.experimental */
protected int flags;
protected Query query; protected Query query;
protected String stringIncludingLocalParams; // the original query string including any local params protected String stringIncludingLocalParams; // the original query string including any local params
@ -83,6 +89,28 @@ public abstract class QParser {
this.req = req; this.req = req;
} }
/** @lucene.experimental */
public void setFlags(int flags) {
this.flags = flags;
}
/** @lucene.experimental */
public int getFlags() {
return flags;
}
/** @lucene.experimental Query is in the context of a filter, where scores don't matter */
public boolean isFilter() {
return (flags & FLAG_FILTER) != 0;
}
/** @lucene.experimental */
public void setIsFilter(boolean isFilter) {
if (isFilter)
flags |= FLAG_FILTER;
else
flags &= ~FLAG_FILTER;
}
private static void addTag(Map<Object,Collection<Object>> tagMap, Object key, Object val) { private static void addTag(Map<Object,Collection<Object>> tagMap, Object key, Object val) {
Collection<Object> lst = tagMap.get(key); Collection<Object> lst = tagMap.get(key);
@ -201,6 +229,7 @@ public abstract class QParser {
defaultType = localParams.get(QueryParsing.DEFTYPE); defaultType = localParams.get(QueryParsing.DEFTYPE);
} }
QParser nestedParser = getParser(q, defaultType, getReq()); QParser nestedParser = getParser(q, defaultType, getReq());
nestedParser.flags = this.flags; // TODO: this would be better passed in to the constructor... change to a ParserContext object?
nestedParser.recurseCount = recurseCount; nestedParser.recurseCount = recurseCount;
recurseCount--; recurseCount--;
return nestedParser; return nestedParser;

View File

@ -907,7 +907,7 @@ public class SolrPluginUtils {
* aliases should work) * aliases should work)
*/ */
@Override @Override
protected Query getFieldQuery(String field, String queryText, boolean quoted) protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw)
throws SyntaxError { throws SyntaxError {
if (aliases.containsKey(field)) { if (aliases.containsKey(field)) {
@ -917,7 +917,7 @@ public class SolrPluginUtils {
List<Query> disjuncts = new ArrayList<>(); List<Query> disjuncts = new ArrayList<>();
for (String f : a.fields.keySet()) { for (String f : a.fields.keySet()) {
Query sub = getFieldQuery(f,queryText,quoted); Query sub = getFieldQuery(f,queryText,quoted, false);
if (null != sub) { if (null != sub) {
if (null != a.fields.get(f)) { if (null != a.fields.get(f)) {
sub = new BoostQuery(sub, a.fields.get(f)); sub = new BoostQuery(sub, a.fields.get(f));
@ -931,7 +931,7 @@ public class SolrPluginUtils {
} else { } else {
try { try {
return super.getFieldQuery(field, queryText, quoted); return super.getFieldQuery(field, queryText, quoted, raw);
} catch (Exception e) { } catch (Exception e) {
return null; return null;
} }

View File

@ -1453,11 +1453,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
@Override @Override
protected Query getFieldQuery(String field, protected Query getFieldQuery(String field,
String val, boolean quoted) throws SyntaxError { String val, boolean quoted, boolean raw) throws SyntaxError {
if(frequentlyMisspelledWords.contains(val)) { if(frequentlyMisspelledWords.contains(val)) {
return getFuzzyQuery(field, val, 0.75F); return getFuzzyQuery(field, val, 0.75F);
} }
return super.getFieldQuery(field, val, quoted); return super.getFieldQuery(field, val, quoted, raw);
} }
} }
} }

View File

@ -16,11 +16,20 @@
*/ */
package org.apache.solr.search; package org.apache.solr.search;
import java.util.Locale;
import java.util.Random;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.query.FilterQuery;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -37,9 +46,9 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
public static void createIndex() { public static void createIndex() {
String v; String v;
v = "how now brown cow"; v = "how now brown cow";
assertU(adoc("id", "1", "text", v, "text_np", v)); assertU(adoc("id", "1", "text", v, "text_np", v, "foo_i","11"));
v = "now cow"; v = "now cow";
assertU(adoc("id", "2", "text", v, "text_np", v)); assertU(adoc("id", "2", "text", v, "text_np", v, "foo_i","12"));
assertU(adoc("id", "3", "foo_s", "a ' \" \\ {! ) } ( { z")); // A value filled with special chars assertU(adoc("id", "3", "foo_s", "a ' \" \\ {! ) } ( { z")); // A value filled with special chars
assertU(adoc("id", "10", "qqq_s", "X")); assertU(adoc("id", "10", "qqq_s", "X"));
@ -184,6 +193,92 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
req.close(); req.close();
} }
// automatically use TermsQuery when appropriate
@Test
public void testAutoTerms() throws Exception {
SolrQueryRequest req = req();
QParser qParser;
Query q,qq;
// relevance query should not be a filter
qParser = QParser.getParser("foo_s:(a b c)", req);
q = qParser.getQuery();
assertEquals(3, ((BooleanQuery)q).clauses().size());
// small filter query should still use BooleanQuery
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
qParser = QParser.getParser("foo_s:(a b c)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(3, ((BooleanQuery) q).clauses().size());
}
// large relevancy query should use BooleanQuery
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
q = qParser.getQuery();
assertEquals(26, ((BooleanQuery)q).clauses().size());
// large filter query should use TermsQuery
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(26, ((TermsQuery)q).getTermData().size());
// large numeric filter query should use TermsQuery (for trie fields)
qParser = QParser.getParser("foo_i:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(20, ((TermsQuery)q).getTermData().size());
// a filter() clause inside a relevancy query should be able to use a TermsQuery
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size());
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
if (qq instanceof TermQuery) {
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
}
if (qq instanceof FilterQuery) {
qq = ((FilterQuery)qq).getQuery();
}
assertEquals(26, ((TermsQuery)qq).getTermData().size());
// test mixed boolean query, including quotes (which shouldn't matter)
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(4, ((BooleanQuery)q).clauses().size());
qq = null;
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery();
if (qq instanceof TermsQuery) break;
}
assertEquals(26, ((TermsQuery)qq).getTermData().size());
req.close();
}
@Test
public void testManyClauses() throws Exception {
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
StringBuilder sb = new StringBuilder("id:(");
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
sb.append('z').append(i).append(' ');
}
sb.append(a);
sb.append(")");
String q = sb.toString();
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
assertJQ(req("q","*:*", "fq", q)
,"/response/numFound==6");
}
@Test @Test
public void testComments() throws Exception { public void testComments() throws Exception {
assertJQ(req("q", "id:1 id:2 /* *:* */ id:3") assertJQ(req("q", "id:1 id:2 /* *:* */ id:3")
@ -317,4 +412,103 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
} }
// parsing performance test
// Run from command line with ant test -Dtestcase=TestSolrQueryParser -Dtestmethod=testParsingPerformance -Dtests.asserts=false 2>/dev/null | grep QPS
@Test
public void testParsingPerformance() throws Exception {
String[] args = {"-queries","100" ,"-iter","1000", "-clauses","100", "-format","term%d", "-seed","0"};
args = new String[] {"-queries","1000" ,"-iter","2000", "-clauses","10", "-format","term%d", "-seed","0"};
// args = new String[] {"-queries","1000" ,"-iter","1000000000", "-clauses","10", "-format","term%d", "-seed","0"};
boolean assertOn = false;
assert assertOn = true;
if (assertOn) {
// System.out.println("WARNING! Assertions are enabled!!!! Will only execute small run. Change with -Dtests.asserts=false");
args = new String[]{"-queries","10" ,"-iter","2", "-clauses","20", "-format","term%d", "-seed","0"};
}
int iter = 1000;
int numQueries = 100;
int maxClauses = 5;
int maxTerm = 10000000;
String format = "term%d";
String field = "foo_s";
long seed = 0;
boolean isFilter = true;
boolean rewrite = false;
String otherStuff = "";
for (int i = 0; i < args.length; i++) {
String a = args[i];
if ("-queries".equals(a)) {
numQueries = Integer.parseInt(args[++i]);
} else if ("-iter".equals(a)) {
iter = Integer.parseInt(args[++i]);
} else if ("-clauses".equals(a)) {
maxClauses = Integer.parseInt(args[++i]);
} else if ("-format".equals(a)) {
format = args[++i];
} else if ("-seed".equals(a)) {
seed = Long.parseLong(args[++i]);
} else {
otherStuff = otherStuff + " " + a;
}
}
Random r = new Random(seed);
String[] queries = new String[numQueries];
for (int i = 0; i < queries.length; i++) {
StringBuilder sb = new StringBuilder();
boolean explicitField = r.nextInt(5) == 0;
if (!explicitField) {
sb.append(field + ":(");
}
sb.append(otherStuff).append(" ");
int nClauses = r.nextInt(maxClauses) + 1; // TODO: query parse can't parse () for some reason???
for (int c = 0; c<nClauses; c++) {
String termString = String.format(Locale.US, format, r.nextInt(maxTerm));
if (explicitField) {
sb.append(field).append(':');
}
sb.append(termString);
sb.append(' ');
}
if (!explicitField) {
sb.append(")");
}
queries[i] = sb.toString();
// System.out.println(queries[i]);
}
SolrQueryRequest req = req();
long start = System.nanoTime();
int ret = 0;
for (int i=0; i<iter; i++) {
for (String qStr : queries) {
QParser parser = QParser.getParser(qStr,req);
parser.setIsFilter(isFilter);
Query q = parser.getQuery();
if (rewrite) {
// TODO: do rewrite
}
ret += q.getClass().hashCode(); // use the query somehow
}
}
long end = System.nanoTime();
System.out.println((assertOn ? "WARNING, assertions enabled. " : "") + "ret=" + ret + " Parser QPS:" + ((long)numQueries * iter)*1000000000/(end-start));
req.close();
}
} }