diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index b471988fb84..b4634501a1a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -142,6 +142,16 @@ Optimizations * SOLR-9772: Deriving distributed sort values (fieldSortValues) should reuse comparator and only invalidate leafComparator. (John Call via yonik) +* SOLR-9786: FieldType has a new getSetQuery() method that can take a set of terms + and create a more efficient query (such as TermsQuery). The solr query parser has been + changed to use this method when appropriate. The parser also knows when it is being + used to parse a filter and will create TermsQueries from large lists of normal terms + or numbers, resulting in a query that will execute faster. This also acts to avoid + BooleanQuery maximum clause limit. Query parsing itself has also been optimized, + resulting in less produced garbage and 5-7% better performance. + (yonik) + + Bug Fixes ---------------------- * SOLR-9701: NPE in export handler when "fl" parameter is omitted. diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 9823f3d1274..fb6fec94111 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -199,10 +199,11 @@ public class QueryComponent extends SearchComponent if (fqs!=null && fqs.length!=0) { List filters = rb.getFilters(); // if filters already exists, make a copy instead of modifying the original - filters = filters == null ? new ArrayList(fqs.length) : new ArrayList<>(filters); + filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters); for (String fq : fqs) { if (fq != null && fq.trim().length()!=0) { QParser fqp = QParser.getParser(fq, req); + fqp.setIsFilter(true); filters.add(fqp.getQuery()); } } diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.java b/solr/core/src/java/org/apache/solr/parser/QueryParser.java index b2ef5007a26..42f359ee811 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.java +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.java @@ -4,11 +4,12 @@ package org.apache.solr.parser; import java.io.StringReader; import java.util.ArrayList; import java.util.List; + import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; -import org.apache.solr.search.SyntaxError; import org.apache.solr.search.QParser; +import org.apache.solr.search.SyntaxError; public class QueryParser extends SolrQueryParserBase implements QueryParserConstants { @@ -135,9 +136,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst addClause(clauses, conj, mods, q); } if (clauses.size() == 1 && firstQuery != null) - {if (true) return firstQuery;} + {if (true) return rawToNormal(firstQuery);} else { - {if (true) return getBooleanQuery(clauses);} + {if (true) return getBooleanQuery(clauses);} } throw new Error("Missing return statement in function"); } @@ -146,6 +147,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst Query q; Token fieldToken=null, boost=null; Token localParams=null; + int flags = 0; if (jj_2_1(2)) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: @@ -195,6 +197,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst break; case FILTER: jj_consume_token(FILTER); + flags=startFilter(); q = Query(field); jj_consume_token(RPAREN); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -206,7 +209,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst jj_la1[7] = jj_gen; ; } - q=getFilter(q); + q=getFilter(q); restoreFlags(flags); break; case LPARAMS: localParams = jj_consume_token(LPARAMS); diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.jj b/solr/core/src/java/org/apache/solr/parser/QueryParser.jj index c5752f61024..a6b93ca691a 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.jj +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.jj @@ -190,9 +190,9 @@ Query Query(String field) throws SyntaxError : )* { if (clauses.size() == 1 && firstQuery != null) - return firstQuery; + return rawToNormal(firstQuery); else { - return getBooleanQuery(clauses); + return getBooleanQuery(clauses); } } } @@ -201,6 +201,7 @@ Query Clause(String field) throws SyntaxError : { Query q; Token fieldToken=null, boost=null; Token localParams=null; + int flags = 0; } { @@ -216,7 +217,7 @@ Query Clause(String field) throws SyntaxError : { ( q=Term(field) | q=Query(field) ( boost=)? - | ( q=Query(field) ( boost=)? { q=getFilter(q); } ) + | ( { flags=startFilter(); } q=Query(field) ( boost=)? { q=getFilter(q); restoreFlags(flags); } ) | (localParams = ( boost=)? { q=getLocalParams(field, localParams.image); } ) ) { return handleBoost(q, boost); } diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 24a6f3e994a..0f6c7ac3475 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -17,6 +17,7 @@ package org.apache.solr.parser; import java.io.StringReader; +import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; import java.util.List; @@ -61,6 +62,7 @@ import org.apache.solr.search.SyntaxError; */ public abstract class SolrQueryParserBase extends QueryBuilder { + public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage static final int CONJ_NONE = 0; static final int CONJ_AND = 1; @@ -89,7 +91,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; boolean autoGeneratePhraseQueries = false; - + int flags; protected IndexSchema schema; protected QParser parser; @@ -125,6 +127,31 @@ public abstract class SolrQueryParserBase extends QueryBuilder { } } + // internal: A simple raw fielded query + public static class RawQuery extends Query { + final SchemaField sfield; + final String externalVal; + + public RawQuery(SchemaField sfield, String externalVal) { + this.sfield = sfield; + this.externalVal = externalVal; + } + + @Override + public String toString(String field) { + return "RAW(" + field + "," + externalVal + ")"; + } + + @Override + public boolean equals(Object obj) { + return false; + } + + @Override + public int hashCode() { + return 0; + } + } // So the generated QueryParser(CharStream) won't error out protected SolrQueryParserBase() { @@ -138,10 +165,22 @@ public abstract class SolrQueryParserBase extends QueryBuilder { public void init(Version matchVersion, String defaultField, QParser parser) { this.schema = parser.getReq().getSchema(); this.parser = parser; + this.flags = parser.getFlags(); this.defaultField = defaultField; setAnalyzer(schema.getQueryAnalyzer()); } + // Turn on the "filter" bit and return the previous flags for the caller to save + int startFilter() { + int oldFlags = flags; + flags |= QParser.FLAG_FILTER; + return oldFlags; + } + + void restoreFlags(int flagsToRestore) { + flags = flagsToRestore; + } + /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. * @param query the query string to be parsed. */ @@ -381,7 +420,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { */ protected Query getFieldQuery(String field, String queryText, int slop) throws SyntaxError { - Query query = getFieldQuery(field, queryText, true); + Query query = getFieldQuery(field, queryText, true, false); // only set slop of the phrase query was a result of this parser // and not a sub-parser. @@ -492,11 +531,77 @@ public abstract class SolrQueryParserBase extends QueryBuilder { if (clauses.size()==0) { return null; // all clause words were filtered away by the analyzer. } - BooleanQuery.Builder query = newBooleanQuery(); - for(final BooleanClause clause: clauses) { - query.add(clause); + + SchemaField sfield = null; + List fieldValues = null; + + + boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD; + int clausesAdded = 0; + + BooleanQuery.Builder booleanBuilder = newBooleanQuery(); + Map> fmap = new HashMap<>(); + + for (BooleanClause clause : clauses) { + Query subq = clause.getQuery(); + if (subq instanceof RawQuery) { + if (clause.getOccur() != BooleanClause.Occur.SHOULD) { + // We only collect optional terms for set queries. Since this isn't optional, + // convert the raw query to a normal query and handle as usual. + clause = new BooleanClause( rawToNormal(subq), clause.getOccur() ); + } else { + // Optional raw query. + RawQuery rawq = (RawQuery) subq; + + // only look up fmap and type info on a field change + if (sfield != rawq.sfield) { + sfield = rawq.sfield; + fieldValues = fmap.get(sfield); + // If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value. + // We are currently relying on things like PointField not being marked as indexed in order to bypass + // the "useTermQuery" check. + if (fieldValues == null && useTermsQuery || !sfield.indexed()) { + fieldValues = new ArrayList<>(2); + fmap.put(sfield, fieldValues); + } + } + + if (fieldValues != null) { + fieldValues.add(rawq.externalVal); + continue; + } + + clause = new BooleanClause( rawToNormal(subq), clause.getOccur() ); + } + } + + clausesAdded++; + booleanBuilder.add(clause); } - return query.build(); + + + for (Map.Entry> entry : fmap.entrySet()) { + sfield = entry.getKey(); + fieldValues = entry.getValue(); + FieldType ft = sfield.getType(); + + // TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder. + if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) { + // use boolean query instead + for (String externalVal : fieldValues) { + Query subq = ft.getFieldQuery(this.parser, sfield, externalVal); + clausesAdded++; + booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); + } + } else { + Query subq = ft.getSetQuery(this.parser, sfield, fieldValues); + if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query + clausesAdded++; + booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); + } + } + + return booleanBuilder.build(); } @@ -526,7 +631,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { q = getFuzzyQuery(qfield, termImage, fms); } else { String termImage=discardEscapeChar(term.image); - q = getFieldQuery(qfield, termImage, false); + q = getFieldQuery(qfield, termImage, false, true); } return q; } @@ -540,10 +645,15 @@ public abstract class SolrQueryParserBase extends QueryBuilder { } catch (Exception ignored) { } } - return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); + + String raw = discardEscapeChar(term.image.substring(1, term.image.length()-1)); + return getFieldQuery(qfield, raw, s); } - // called from parser + + + // Called from parser + // Raw queries are transformed to normal queries before wrapping in a BoostQuery Query handleBoost(Query q, Token boost) { // q==null check is to avoid boosting null queries, such as those caused by stop words if (boost == null || boost.image.length()==0 || q == null) { @@ -556,14 +666,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder { if (q instanceof ConstantScoreQuery || q instanceof SolrConstantScoreQuery) { // skip } else { - newQ = new ConstantScoreQuery(q); + newQ = new ConstantScoreQuery( rawToNormal(q) ); } return new BoostQuery(newQ, val); } float boostVal = Float.parseFloat(boost.image); - return new BoostQuery(q, boostVal); + return new BoostQuery( rawToNormal(q), boostVal); } @@ -577,17 +687,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder { * */ String discardEscapeChar(String input) throws SyntaxError { + int start = input.indexOf('\\'); + if (start < 0) return input; + // Create char array to hold unescaped char sequence char[] output = new char[input.length()]; + input.getChars(0, start, output, 0); // The length of the output can be less than the input // due to discarded escape chars. This variable holds // the actual length of the output - int length = 0; + int length = start; // We remember whether the last processed character was // an escape character - boolean lastCharWasEscapeChar = false; + boolean lastCharWasEscapeChar = true; // The multiplier the current unicode digit must be multiplied with. // E. g. the first digit must be multiplied with 16^3, the second with 16^2... @@ -596,7 +710,8 @@ public abstract class SolrQueryParserBase extends QueryBuilder { // Used to calculate the codepoint of the escaped unicode character int codePoint = 0; - for (int i = 0; i < input.length(); i++) { + // start after the first escape char + for (int i = start+1; i < input.length(); i++) { char curChar = input.charAt(i); if (codePointMultiplier > 0) { codePoint += hexToInt(curChar) * codePointMultiplier; @@ -715,25 +830,57 @@ public abstract class SolrQueryParserBase extends QueryBuilder { private QParser subQParser = null; + // Create a "normal" query from a RawQuery (or just return the current query if it's not raw) + Query rawToNormal(Query q) { + if (!(q instanceof RawQuery)) return q; + RawQuery rq = (RawQuery)q; + return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal); + } + protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError { + return getFieldQuery(field, queryText, quoted, false); + } + + // private use for getFieldQuery + private String lastFieldName; + private SchemaField lastField; + + // if raw==true, then it's possible for this method to return a RawQuery that will need to be transformed + // further before using. + protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError { checkNullField(field); - // intercept magic field name of "_" to use as a hook for our - // own functions. - if (field.charAt(0) == '_' && parser != null) { - MagicFieldName magic = MagicFieldName.get(field); - if (null != magic) { - subQParser = parser.subQuery(queryText, magic.subParser); - return subQParser.getQuery(); + + SchemaField sf; + if (field.equals(lastFieldName)) { + // only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields + // and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time. + sf = lastField; + } else { + // intercept magic field name of "_" to use as a hook for our + // own functions. + if (field.charAt(0) == '_' && parser != null) { + MagicFieldName magic = MagicFieldName.get(field); + if (null != magic) { + subQParser = parser.subQuery(queryText, magic.subParser); + return subQParser.getQuery(); + } } + + lastFieldName = field; + sf = lastField = schema.getFieldOrNull(field); } - SchemaField sf = schema.getFieldOrNull(field); + if (sf != null) { FieldType ft = sf.getType(); // delegate to type for everything except tokenized fields if (ft.isTokenized() && sf.indexed()) { return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries())); } else { - return sf.getType().getFieldQuery(parser, sf, queryText); + if (raw) { + return new RawQuery(sf, queryText); + } else { + return sf.getType().getFieldQuery(parser, sf, queryText); + } } } @@ -742,6 +889,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { } + // called from parser protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError { checkNullField(field); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index ea4df905adb..b67f88de678 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -19,6 +19,7 @@ package org.apache.solr.schema; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -38,7 +39,10 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.legacy.LegacyNumericType; +import org.apache.lucene.queries.TermsQuery; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocValuesRangeQuery; import org.apache.lucene.search.DocValuesRewriteMethod; import org.apache.lucene.search.MultiTermQuery; @@ -56,8 +60,8 @@ import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.Version; import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.TokenizerChain; -import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; @@ -743,7 +747,27 @@ public abstract class FieldType extends FieldProperties { return new TermQuery(new Term(field.getName(), br)); } } - + + /** @lucene.experimental */ + public Query getSetQuery(QParser parser, SchemaField field, Collection externalVals) { + if (!field.indexed()) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (String externalVal : externalVals) { + Query subq = getFieldQuery(parser, field, externalVal); + builder.add(subq, BooleanClause.Occur.SHOULD); + } + return builder.build(); + } + + List lst = new ArrayList<>(externalVals.size()); + BytesRefBuilder br = new BytesRefBuilder(); + for (String externalVal : externalVals) { + readableToIndexed(externalVal, br); + lst.add( br.toBytesRef() ); + } + return new TermsQuery(field.getName() , lst); + } + /** * Expert: Returns the rewrite method for multiterm queries such as wildcards. * @param parser The {@link org.apache.solr.search.QParser} calling the method diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index d15b1c1e686..ebb6188ec18 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -1032,8 +1032,8 @@ public class ExtendedDismaxQParser extends QParser { } @Override - protected Query getFieldQuery(String field, String val, boolean quoted) throws SyntaxError { - this.type = QType.FIELD; + protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError { + this.type = quoted ? QType.PHRASE : QType.FIELD; this.field = field; this.val = val; this.slop = getPhraseSlop(); // unspecified @@ -1212,7 +1212,7 @@ public class ExtendedDismaxQParser extends QParser { switch (type) { case FIELD: // fallthrough case PHRASE: - Query query = super.getFieldQuery(field, val, type == QType.PHRASE); + Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false); // Boolean query on a whitespace-separated string // If these were synonyms we would have a SynonymQuery if (query instanceof BooleanQuery) { diff --git a/solr/core/src/java/org/apache/solr/search/FunctionRangeQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/FunctionRangeQParserPlugin.java index 1b37a97f097..4cda8ee7234 100644 --- a/solr/core/src/java/org/apache/solr/search/FunctionRangeQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/FunctionRangeQParserPlugin.java @@ -46,7 +46,9 @@ public class FunctionRangeQParserPlugin extends QParserPlugin { @Override public Query parse() throws SyntaxError { funcStr = localParams.get(QueryParsing.V, null); - Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).getQuery(); + QParser subParser = subQuery(funcStr, FunctionQParserPlugin.NAME); + subParser.setIsFilter(false); // the range can be based on the relevancy score of embedded queries. + Query funcQ = subParser.getQuery(); if (funcQ instanceof FunctionQuery) { vs = ((FunctionQuery)funcQ).getValueSource(); } else { diff --git a/solr/core/src/java/org/apache/solr/search/QParser.java b/solr/core/src/java/org/apache/solr/search/QParser.java index 40605be726d..58c0265231f 100644 --- a/solr/core/src/java/org/apache/solr/search/QParser.java +++ b/solr/core/src/java/org/apache/solr/search/QParser.java @@ -32,12 +32,18 @@ import java.util.*; * */ public abstract class QParser { + /** @lucene.experimental */ + public static final int FLAG_FILTER = 0x01; + protected String qstr; protected SolrParams params; protected SolrParams localParams; protected SolrQueryRequest req; protected int recurseCount; + /** @lucene.experimental */ + protected int flags; + protected Query query; protected String stringIncludingLocalParams; // the original query string including any local params @@ -83,6 +89,28 @@ public abstract class QParser { this.req = req; } + /** @lucene.experimental */ + public void setFlags(int flags) { + this.flags = flags; + } + + /** @lucene.experimental */ + public int getFlags() { + return flags; + } + + /** @lucene.experimental Query is in the context of a filter, where scores don't matter */ + public boolean isFilter() { + return (flags & FLAG_FILTER) != 0; + } + + /** @lucene.experimental */ + public void setIsFilter(boolean isFilter) { + if (isFilter) + flags |= FLAG_FILTER; + else + flags &= ~FLAG_FILTER; + } private static void addTag(Map> tagMap, Object key, Object val) { Collection lst = tagMap.get(key); @@ -201,6 +229,7 @@ public abstract class QParser { defaultType = localParams.get(QueryParsing.DEFTYPE); } QParser nestedParser = getParser(q, defaultType, getReq()); + nestedParser.flags = this.flags; // TODO: this would be better passed in to the constructor... change to a ParserContext object? nestedParser.recurseCount = recurseCount; recurseCount--; return nestedParser; diff --git a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java index 411049ac851..93866005718 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java +++ b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java @@ -907,7 +907,7 @@ public class SolrPluginUtils { * aliases should work) */ @Override - protected Query getFieldQuery(String field, String queryText, boolean quoted) + protected Query getFieldQuery(String field, String queryText, boolean quoted, boolean raw) throws SyntaxError { if (aliases.containsKey(field)) { @@ -917,7 +917,7 @@ public class SolrPluginUtils { List disjuncts = new ArrayList<>(); for (String f : a.fields.keySet()) { - Query sub = getFieldQuery(f,queryText,quoted); + Query sub = getFieldQuery(f,queryText,quoted, false); if (null != sub) { if (null != a.fields.get(f)) { sub = new BoostQuery(sub, a.fields.get(f)); @@ -931,7 +931,7 @@ public class SolrPluginUtils { } else { try { - return super.getFieldQuery(field, queryText, quoted); + return super.getFieldQuery(field, queryText, quoted, raw); } catch (Exception e) { return null; } diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 9b305f2941d..c3b119f1182 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -1453,11 +1453,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { @Override protected Query getFieldQuery(String field, - String val, boolean quoted) throws SyntaxError { + String val, boolean quoted, boolean raw) throws SyntaxError { if(frequentlyMisspelledWords.contains(val)) { return getFuzzyQuery(field, val, 0.75F); } - return super.getFieldQuery(field, val, quoted); + return super.getFieldQuery(field, val, quoted, raw); } } } diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java index 0b9c0f0c181..d3e6a7f961a 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java @@ -16,11 +16,20 @@ */ package org.apache.solr.search; +import java.util.Locale; +import java.util.Random; + +import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.core.SolrInfoMBean; +import org.apache.solr.parser.QueryParser; +import org.apache.solr.query.FilterQuery; import org.apache.solr.request.SolrQueryRequest; import org.junit.BeforeClass; import org.junit.Test; @@ -37,9 +46,9 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { public static void createIndex() { String v; v = "how now brown cow"; - assertU(adoc("id", "1", "text", v, "text_np", v)); + assertU(adoc("id", "1", "text", v, "text_np", v, "foo_i","11")); v = "now cow"; - assertU(adoc("id", "2", "text", v, "text_np", v)); + assertU(adoc("id", "2", "text", v, "text_np", v, "foo_i","12")); assertU(adoc("id", "3", "foo_s", "a ' \" \\ {! ) } ( { z")); // A value filled with special chars assertU(adoc("id", "10", "qqq_s", "X")); @@ -184,6 +193,92 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { req.close(); } + + // automatically use TermsQuery when appropriate + @Test + public void testAutoTerms() throws Exception { + SolrQueryRequest req = req(); + QParser qParser; + Query q,qq; + + // relevance query should not be a filter + qParser = QParser.getParser("foo_s:(a b c)", req); + q = qParser.getQuery(); + assertEquals(3, ((BooleanQuery)q).clauses().size()); + + // small filter query should still use BooleanQuery + if (QueryParser.TERMS_QUERY_THRESHOLD > 3) { + qParser = QParser.getParser("foo_s:(a b c)", req); + qParser.setIsFilter(true); // this may change in the future + q = qParser.getQuery(); + assertEquals(3, ((BooleanQuery) q).clauses().size()); + } + + // large relevancy query should use BooleanQuery + // TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so + qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); + q = qParser.getQuery(); + assertEquals(26, ((BooleanQuery)q).clauses().size()); + + // large filter query should use TermsQuery + qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); + qParser.setIsFilter(true); // this may change in the future + q = qParser.getQuery(); + assertEquals(26, ((TermsQuery)q).getTermData().size()); + + // large numeric filter query should use TermsQuery (for trie fields) + qParser = QParser.getParser("foo_i:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); + qParser.setIsFilter(true); // this may change in the future + q = qParser.getQuery(); + assertEquals(20, ((TermsQuery)q).getTermData().size()); + + // a filter() clause inside a relevancy query should be able to use a TermsQuery + qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req); + q = qParser.getQuery(); + assertEquals(2, ((BooleanQuery)q).clauses().size()); + qq = ((BooleanQuery)q).clauses().get(0).getQuery(); + if (qq instanceof TermQuery) { + qq = ((BooleanQuery)q).clauses().get(1).getQuery(); + } + + if (qq instanceof FilterQuery) { + qq = ((FilterQuery)qq).getQuery(); + } + + assertEquals(26, ((TermsQuery)qq).getTermData().size()); + + // test mixed boolean query, including quotes (which shouldn't matter) + qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req); + qParser.setIsFilter(true); // this may change in the future + q = qParser.getQuery(); + assertEquals(4, ((BooleanQuery)q).clauses().size()); + qq = null; + for (BooleanClause clause : ((BooleanQuery)q).clauses()) { + qq = clause.getQuery(); + if (qq instanceof TermsQuery) break; + } + assertEquals(26, ((TermsQuery)qq).getTermData().size()); + + req.close(); + } + + @Test + public void testManyClauses() throws Exception { + String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms + StringBuilder sb = new StringBuilder("id:("); + for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024 + sb.append('z').append(i).append(' '); + } + sb.append(a); + sb.append(")"); + + String q = sb.toString(); + + // This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used. + assertJQ(req("q","*:*", "fq", q) + ,"/response/numFound==6"); + } + @Test public void testComments() throws Exception { assertJQ(req("q", "id:1 id:2 /* *:* */ id:3") @@ -317,4 +412,103 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { } + // parsing performance test + // Run from command line with ant test -Dtestcase=TestSolrQueryParser -Dtestmethod=testParsingPerformance -Dtests.asserts=false 2>/dev/null | grep QPS + @Test + public void testParsingPerformance() throws Exception { + String[] args = {"-queries","100" ,"-iter","1000", "-clauses","100", "-format","term%d", "-seed","0"}; + args = new String[] {"-queries","1000" ,"-iter","2000", "-clauses","10", "-format","term%d", "-seed","0"}; + // args = new String[] {"-queries","1000" ,"-iter","1000000000", "-clauses","10", "-format","term%d", "-seed","0"}; + + boolean assertOn = false; + assert assertOn = true; + if (assertOn) { + // System.out.println("WARNING! Assertions are enabled!!!! Will only execute small run. Change with -Dtests.asserts=false"); + args = new String[]{"-queries","10" ,"-iter","2", "-clauses","20", "-format","term%d", "-seed","0"}; + } + + + int iter = 1000; + int numQueries = 100; + int maxClauses = 5; + int maxTerm = 10000000; + String format = "term%d"; + String field = "foo_s"; + long seed = 0; + boolean isFilter = true; + boolean rewrite = false; + + String otherStuff = ""; + + for (int i = 0; i < args.length; i++) { + String a = args[i]; + if ("-queries".equals(a)) { + numQueries = Integer.parseInt(args[++i]); + } else if ("-iter".equals(a)) { + iter = Integer.parseInt(args[++i]); + } else if ("-clauses".equals(a)) { + maxClauses = Integer.parseInt(args[++i]); + } else if ("-format".equals(a)) { + format = args[++i]; + } else if ("-seed".equals(a)) { + seed = Long.parseLong(args[++i]); + } else { + otherStuff = otherStuff + " " + a; + } + } + + Random r = new Random(seed); + + String[] queries = new String[numQueries]; + for (int i = 0; i < queries.length; i++) { + StringBuilder sb = new StringBuilder(); + boolean explicitField = r.nextInt(5) == 0; + if (!explicitField) { + sb.append(field + ":("); + } + + sb.append(otherStuff).append(" "); + + int nClauses = r.nextInt(maxClauses) + 1; // TODO: query parse can't parse () for some reason??? + + for (int c = 0; c