diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index f723a25b24c..eaf2aedc601 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -266,7 +266,7 @@ public class ICUCollationField extends FieldType { } @Override - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { String f = field.getName(); BytesRef low = part1 == null ? null : getCollationKey(f, part1); BytesRef high = part2 == null ? null : getCollationKey(f, part2); diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index b5e659674d6..81516f695b6 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -1184,14 +1184,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder { // Solr has always used constant scoring for prefix queries. This should return constant scoring by default. return newPrefixQuery(new Term(field, termStr)); } + // called from parser + protected Query getExistenceQuery(String field) { + checkNullField(field); + SchemaField sf = schema.getField(field); + return sf.getType().getExistenceQuery(parser, sf); + } // called from parser protected Query getWildcardQuery(String field, String termStr) throws SyntaxError { checkNullField(field); - // *:* -> MatchAllDocsQuery + if ("*".equals(termStr)) { if ("*".equals(field) || getExplicitField() == null) { + // '*:*' and '*' -> MatchAllDocsQuery return newMatchAllDocsQuery(); + } else { + // 'foo:*' -> existenceQuery + return getExistenceQuery(field); } } diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java index 404dae2bd9b..0fe3429d284 100644 --- a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java @@ -316,7 +316,7 @@ public abstract class AbstractSpatialFieldType extend } @Override - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { if (!minInclusive || !maxInclusive) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Both sides of spatial range query must be inclusive: " + field.getName()); Point p1 = SpatialUtils.parsePointSolrException(part1, ctx); diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index b8285aa0797..3b5d2b8d931 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -236,7 +236,7 @@ public class CollationField extends FieldType { } @Override - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { String f = field.getName(); BytesRef low = part1 == null ? null : getCollationKey(f, part1); BytesRef high = part2 == null ? null : getCollationKey(f, part2); diff --git a/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java b/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java index 4e59212f9ab..f28fb38afed 100644 --- a/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java @@ -251,7 +251,7 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc CurrencyValue valueDefault; valueDefault = value.convertTo(provider, defaultCurrency); - return getRangeQuery(parser, field, valueDefault, valueDefault, true, true); + return getRangeQueryInternal(parser, field, valueDefault, valueDefault, true, true); } /** @@ -316,8 +316,18 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc source); } + /** + * Override the default existenceQuery implementation to run an existence query on the underlying amountField instead. + */ @Override - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, final boolean minInclusive, final boolean maxInclusive) { + public Query getExistenceQuery(QParser parser, SchemaField field) { + // Use an existence query of the underlying amount field + SchemaField amountField = getAmountField(field); + return amountField.getType().getExistenceQuery(parser, amountField); + } + + @Override + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, final boolean minInclusive, final boolean maxInclusive) { final CurrencyValue p1 = CurrencyValue.parse(part1, defaultCurrency); final CurrencyValue p2 = CurrencyValue.parse(part2, defaultCurrency); @@ -327,10 +337,10 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc ": range queries only supported when upper and lower bound have same currency."); } - return getRangeQuery(parser, field, p1, p2, minInclusive, maxInclusive); + return getRangeQueryInternal(parser, field, p1, p2, minInclusive, maxInclusive); } - public Query getRangeQuery(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) { + private Query getRangeQueryInternal(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) { String currencyCode = (p1 != null) ? p1.getCurrencyCode() : (p2 != null) ? p2.getCurrencyCode() : defaultCurrency; diff --git a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java index b7c3329302c..67aa9fd9092 100644 --- a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java +++ b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java @@ -143,7 +143,7 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldTypepart1 and part2 as null if they are '*' respectively. minInclusive and maxInclusive are both true + * currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have + * different semantics. + *

+ * By default range queries with '*'s or nulls on either side are treated as existence queries and are created with {@link #getExistenceQuery}. + * If unbounded range queries should not be treated as existence queries for a certain fieldType, then {@link #treatUnboundedRangeAsExistence} should be overriden. + *

+ * Sub-classes should override the {@link #getSpecializedRangeQuery} method to provide their own range query implementation. + * + * @param parser the {@link org.apache.solr.search.QParser} calling the method + * @param field the schema field + * @param part1 the lower boundary of the range, nulls are allowed. + * @param part2 the upper boundary of the range, nulls are allowe + * @param minInclusive whether the minimum of the range is inclusive or not + * @param maxInclusive whether the maximum of the range is inclusive or not + * @return a Query instance to perform range search according to given parameters + */ + public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + if (part1 == null && part2 == null && treatUnboundedRangeAsExistence(field)) { + return getExistenceQuery(parser, field); + } + return getSpecializedRangeQuery(parser, field, part1, part2, minInclusive, maxInclusive); + } + + /** + * Returns whether an unbounded range query should be treated the same as an existence query for the given field type. + * + * @param field the schema field + * @return whether unbounded range and existence are equivalent for the given field type. + */ + protected boolean treatUnboundedRangeAsExistence(SchemaField field) { + return true; + } + + /** + * Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser} + * currently passes part1 and part2 as null if they are '*' respectively. minInclusive and maxInclusive are both true * currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have * different semantics. *

* Sub-classes should override this method to provide their own range query implementation. They should strive to - * handle nulls in part1 and/or part2 as well as unequal minInclusive and maxInclusive parameters gracefully. + * handle nulls in part1 and/or part2 as well as unequal minInclusive and maxInclusive parameters gracefully. + *

+ * This method does not, and should not, check for or handle existence queries, please look at {@link #getRangeQuery} for that logic. * * @param parser the {@link org.apache.solr.search.QParser} calling the method * @param field the schema field @@ -867,31 +907,67 @@ public abstract class FieldType extends FieldProperties { * @return a Query instance to perform range search according to given parameters * */ - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { // TODO: change these all to use readableToIndexed/bytes instead (e.g. for unicode collation) final BytesRef miValue = part1 == null ? null : new BytesRef(toInternal(part1)); final BytesRef maxValue = part2 == null ? null : new BytesRef(toInternal(part2)); + if (field.hasDocValues() && !field.indexed()) { return SortedSetDocValuesField.newSlowRangeQuery( - field.getName(), - miValue, maxValue, - minInclusive, maxInclusive); + field.getName(), + miValue, maxValue, + minInclusive, maxInclusive); } else { SolrRangeQuery rangeQuery = new SolrRangeQuery( - field.getName(), - miValue, maxValue, - minInclusive, maxInclusive); + field.getName(), + miValue, maxValue, + minInclusive, maxInclusive); return rangeQuery; } } + /** + * Returns a Query instance for doing existence searches for a field. + * If the field does not have docValues or norms, this method will call {@link #getSpecializedExistenceQuery}, which defaults to an unbounded rangeQuery. + *

+ * This method should only be overriden whenever a fieldType does not support {@link org.apache.lucene.search.DocValuesFieldExistsQuery} or {@link org.apache.lucene.search.NormsFieldExistsQuery}. + * If a fieldType does not support an unbounded rangeQuery as an existenceQuery (such as double or float fields), {@link #getSpecializedExistenceQuery} should be overriden. + * + * @param parser The {@link org.apache.solr.search.QParser} calling the method + * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search + * @return The {@link org.apache.lucene.search.Query} instance. + */ + public Query getExistenceQuery(QParser parser, SchemaField field) { + if (field.hasDocValues()) { + return new DocValuesFieldExistsQuery(field.getName()); + } else if (!field.omitNorms() && !isPointField()) { //TODO: Remove !isPointField() for SOLR-14199 + return new NormsFieldExistsQuery(field.getName()); + } else { + // Default to an unbounded range query + return getSpecializedExistenceQuery(parser, field); + } + } + + /** + * Returns a Query instance for doing existence searches for a field without certain options, such as docValues or norms. + *

+ * This method can be overriden to implement specialized existence logic for fieldTypes. + * The default query returned is an unbounded range query. + * + * @param parser The {@link org.apache.solr.search.QParser} calling the method + * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search + * @return The {@link org.apache.lucene.search.Query} instance. + */ + protected Query getSpecializedExistenceQuery(QParser parser, SchemaField field) { + return getSpecializedRangeQuery(parser, field, null, null, true, true); + } + /** * Returns a Query instance for doing searches against a field. * @param parser The {@link org.apache.solr.search.QParser} calling the method * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search * @param externalVal The String representation of the value to search * @return The {@link org.apache.lucene.search.Query} instance. This implementation returns a {@link org.apache.lucene.search.TermQuery} but overriding queries may not - * */ public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) { BytesRefBuilder br = new BytesRefBuilder(); diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonType.java b/solr/core/src/java/org/apache/solr/schema/LatLonType.java index 88ab20b9f10..ecebd13373c 100644 --- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java +++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java @@ -103,7 +103,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery @Override - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { Point p1 = SpatialUtils.parsePointSolrException(part1, SpatialContext.GEO); Point p2 = SpatialUtils.parsePointSolrException(part2, SpatialContext.GEO); diff --git a/solr/core/src/java/org/apache/solr/schema/NumericFieldType.java b/solr/core/src/java/org/apache/solr/schema/NumericFieldType.java index 86697e58fef..d33e48441cd 100644 --- a/solr/core/src/java/org/apache/solr/schema/NumericFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/NumericFieldType.java @@ -16,11 +16,16 @@ */ package org.apache.solr.schema; +import java.util.EnumSet; + import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.NumericUtils; @@ -302,4 +307,38 @@ public abstract class NumericFieldType extends PrimitiveFieldType { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg); } } + + public static EnumSet doubleOrFloat = EnumSet.of(NumberType.FLOAT, NumberType.DOUBLE); + + /** + * For doubles and floats, unbounded range queries (which do not match NaN values) are not equivalent to existence queries (which do match NaN values). + * + * The two types of queries are equivalent for all other numeric types. + * + * @param field the schema field + * @return false for double and float fields, true for all others + */ + @Override + protected boolean treatUnboundedRangeAsExistence(SchemaField field) { + return !doubleOrFloat.contains(getNumberType()); + } + + /** + * Override the default existence behavior, so that the non-docValued/norms implementation matches NaN values for double and float fields. + * The [* TO *] query for those fields does not match 'NaN' values, so they must be matched separately. + *

+ * For doubles and floats the query behavior is equivalent to (field:[* TO *] OR field:NaN). + * For all other numeric types, the default existence query behavior is used. + */ + @Override + public Query getSpecializedExistenceQuery(QParser parser, SchemaField field) { + if (doubleOrFloat.contains(getNumberType())) { + return new ConstantScoreQuery(new BooleanQuery.Builder() + .add(getSpecializedRangeQuery(parser, field, null, null, true, true), BooleanClause.Occur.SHOULD) + .add(getFieldQuery(parser, field, Float.toString(Float.NaN)), BooleanClause.Occur.SHOULD) + .setMinimumNumberShouldMatch(1).build()); + } else { + return super.getSpecializedExistenceQuery(parser, field); + } + } } diff --git a/solr/core/src/java/org/apache/solr/schema/PointField.java b/solr/core/src/java/org/apache/solr/schema/PointField.java index 91a342cfa80..b082593965e 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointField.java +++ b/solr/core/src/java/org/apache/solr/schema/PointField.java @@ -161,12 +161,9 @@ public abstract class PointField extends NumericFieldType { protected abstract Query getExactQuery(SchemaField field, String externalVal); - public abstract Query getPointRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, - boolean maxInclusive); - @Override - public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, - boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, + boolean maxInclusive) { if (!field.indexed() && field.hasDocValues()) { return getDocValuesRangeQuery(parser, field, min, max, minInclusive, maxInclusive); } else if (field.indexed() && field.hasDocValues()) { @@ -178,6 +175,9 @@ public abstract class PointField extends NumericFieldType { } } + public abstract Query getPointRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, + boolean maxInclusive); + @Override public String storedToReadable(IndexableField f) { return toExternal(f); @@ -219,9 +219,12 @@ public abstract class PointField extends NumericFieldType { } protected abstract String indexedToReadable(BytesRef indexedForm); - + @Override public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { + if ("".equals(termStr)) { + return getExistenceQuery(parser, sf); + } throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't run prefix queries on numeric fields"); } diff --git a/solr/core/src/java/org/apache/solr/schema/PointType.java b/solr/core/src/java/org/apache/solr/schema/PointType.java index e088e7ff31d..a67b0ae5402 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointType.java +++ b/solr/core/src/java/org/apache/solr/schema/PointType.java @@ -128,7 +128,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable { /** * Care should be taken in calling this with higher order dimensions for performance reasons. */ - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { //Query could look like: [x1,y1 TO x2,y2] for 2 dimension, but could look like: [x1,y1,z1 TO x2,y2,z2], and can be extrapolated to n-dimensions //thus, this query essentially creates a box, cube, etc. String[] p1 = parseCommaSeparatedList(part1, dimension); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 3bad0f21fd4..bddaf00c760 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -158,7 +158,7 @@ public class TextField extends FieldType { } @Override - public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { Analyzer multiAnalyzer = getMultiTermAnalyzer(); BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer); BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer); diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index 90b27e459d4..c3f636e5de6 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -298,10 +298,10 @@ public class TrieField extends NumericFieldType { } @Override - public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) { + protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) { if (field.multiValued() && field.hasDocValues() && !field.indexed()) { // for the multi-valued dv-case, the default rangeimpl over toInternal is correct - return super.getRangeQuery(parser, field, min, max, minInclusive, maxInclusive); + return super.getSpecializedRangeQuery(parser, field, min, max, minInclusive, maxInclusive); } int ps = precisionStep; Query query; diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml index 9438f16dc74..eb407262ffd 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml @@ -686,23 +686,33 @@ - + + + + + + + + - + - + + + + @@ -721,6 +731,17 @@ + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml index c8328aca3e5..91fb0928c2f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml @@ -577,7 +577,11 @@ + + + + @@ -595,6 +599,7 @@ + diff --git a/solr/core/src/test/org/apache/solr/schema/CurrencyFieldTypeTest.java b/solr/core/src/test/org/apache/solr/schema/CurrencyFieldTypeTest.java index d7174691705..65561255be6 100644 --- a/solr/core/src/test/org/apache/solr/schema/CurrencyFieldTypeTest.java +++ b/solr/core/src/test/org/apache/solr/schema/CurrencyFieldTypeTest.java @@ -220,11 +220,16 @@ public class CurrencyFieldTypeTest extends SolrTestCaseJ4 { assertQ(req("fl", "*,score", "q", fieldName+":[24.99,EUR TO 25.01,EUR]"), "//*[@numFound='1']"); - + // Open ended ranges without currency assertQ(req("fl", "*,score", "q", fieldName+":[* TO *]"), "//*[@numFound='" + (2 + 10 + negDocs) + "']"); + + // Open ended ranges without currency + assertQ(req("fl", "*,score", "q", + fieldName+":*"), + "//*[@numFound='" + (2 + 10 + negDocs) + "']"); // Open ended ranges with currency assertQ(req("fl", "*,score", "q", diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index 324b0e466e6..00108d7e6ed 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -16,6 +16,7 @@ */ package org.apache.solr.search; +import java.util.Arrays; import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -94,6 +95,21 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { " +apache +solr"); } + public void testQueryLuceneAllDocsWithField() throws Exception { + // for all "primative" types except for doubles/floats, 'foo:*' should be functionally equivilent to "foo:[* TO *]" + // whatever implementation/optimizations exist for one syntax, should exist for the other syntax as well + // (regardless of docValues, multivalued, etc...) + for (String field : Arrays.asList("foo_sI", "foo_sS", "foo_s1", "foo_s", + "t_foo", "tv_foo", "tv_mv_foo", + "foo_b", "foo_b_dvo", + "foo_i", "foo_is", "foo_i_dvo", + "foo_l", "foo_l_dvo", + "foo_dt", "foo_dt_dvo")) { + + assertQueryEquals("lucene", field + ":*", field + ":[* TO *]"); + } + } + public void testQueryPrefix() throws Exception { SolrQueryRequest req = req("myField","foo_s"); try { diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java index 87cbc95c9d5..9fb2598e2ea 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java @@ -17,6 +17,7 @@ package org.apache.solr.search; import java.util.Arrays; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -28,13 +29,16 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; @@ -44,7 +48,10 @@ import org.apache.solr.metrics.SolrMetricManager; import org.apache.solr.parser.QueryParser; import org.apache.solr.query.FilterQuery; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.NumberType; import org.apache.solr.schema.SchemaField; +import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -59,6 +66,15 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { createIndex(); } + private static final List HAS_VAL_FIELDS = new ArrayList(41); + private static final List HAS_NAN_FIELDS = new ArrayList(12); + + @AfterClass + public static void afterClass() throws Exception { + HAS_VAL_FIELDS.clear(); + HAS_NAN_FIELDS.clear(); + } + public static void createIndex() { String v; v = "how now brown cow"; @@ -73,12 +89,98 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash")); assertU(adoc("id", "20", "syn", "wifi ATM")); - + + { // make a doc that has a value in *lots* of fields that no other doc has + SolrInputDocument doc = sdoc("id", "999"); + + // numbers... + for (String t : Arrays.asList("i", "l", "f", "d")) { + for (String s : Arrays.asList("", "s", "_dv", "s_dv", "_dvo", "_norms")) { + final String f = "has_val_" + t + s; + HAS_VAL_FIELDS.add(f); + doc.addField(f, "42"); + + if (t.equals("f") || t.equals("d")) { + String nanField = "nan_val_" + t + s; + doc.addField(nanField, "NaN"); + HAS_NAN_FIELDS.add(nanField); + + // Add a NaN & non-NaN value for multivalue fields, these should match :* and :[* TO *] equivalently + if (s.startsWith("s")) { + String bothField = "both_val_" + t + s; + doc.addField(bothField, "42"); + doc.addField(bothField, "NaN"); + HAS_VAL_FIELDS.add(bothField); + } + } + } + } + // boolean...booleans + for (String s : Arrays.asList("", "s", "_dv", "_norms")) { + final String f = "has_val_b" + s; + HAS_VAL_FIELDS.add(f); + doc.addField(f, "false"); + } + + // dates (and strings/text -- they don't care about the format)... + for (String s : Arrays.asList("dt", "s", "s1", "t", "t_on", "dt_norms", "s_norms", "dt_dv", "s_dv")) { + final String f = "has_val_" + s; + HAS_VAL_FIELDS.add(f); + doc.addField(f, "2019-01-12T00:00:00Z"); + } + assertU(adoc(doc)); + } + assertU(adoc("id", "30", "shingle23", "A B X D E")); assertU(commit()); } + public void testDocsWithValuesInField() throws Exception { + assertEquals("someone changed the test setup of HAS_VAL_FIELDS, w/o updating the sanity check", + 41, HAS_VAL_FIELDS.size()); + for (String f : HAS_VAL_FIELDS) { + // for all of these fields, these 2 syntaxes should be functionally equivilent + // in matching the one doc that contains these fields + for (String q : Arrays.asList( f + ":*", f + ":[* TO *]" )) { + assertJQ(req("q", q) + , "/response/numFound==1" + , "/response/docs/[0]/id=='999'" + ); + // the same syntaxes should be valid even if no doc has the field... + assertJQ(req("q", "bogus___" + q) + , "/response/numFound==0" + ); + + } + } + } + + public void testDocsWithNaNInField() throws Exception { + assertEquals("someone changed the test setup of HAS_NAN_FIELDS, w/o updating the sanity check", + 12, HAS_NAN_FIELDS.size()); + for (String f : HAS_NAN_FIELDS) { + // for all of these fields, field:* should NOT be equivalent to field:[* TO *] + assertJQ(req("q", f + ":*") + , "/response/numFound==1" + , "/response/docs/[0]/id=='999'" + ); + assertJQ(req("q", f + ":[* TO *]") + , "/response/numFound==0" + ); + assertJQ(req("q", f + ":[-Infinity TO Infinity]") + , "/response/numFound==0" + ); + for (String q : Arrays.asList( f + ":*", f + ":[* TO *]", f + ":[-Infinity TO Infinity]" )) { + // the same syntaxes should be valid even if no doc has the field... + assertJQ(req("q", "bogus___" + q) + , "/response/numFound==0" + ); + + } + } + } + @Test public void testPhrase() { // "text" field's type has WordDelimiterGraphFilter (WDGFF) and autoGeneratePhraseQueries=true @@ -1135,14 +1237,14 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { "is_dv", "fs_dv", "ds_dv", "ls_dv", "i_dvo", "f_dvo", "d_dvo", "l_dvo", }; - + for (String suffix:fieldSuffix) { //Good queries qParser = QParser.getParser("foo_" + suffix + ":(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 25)", req); qParser.setIsFilter(true); qParser.getQuery(); } - + for (String suffix:fieldSuffix) { qParser = QParser.getParser("foo_" + suffix + ":(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 NOT_A_NUMBER)", req); qParser.setIsFilter(true); // this may change in the future @@ -1150,7 +1252,59 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code()); assertTrue("Unexpected exception: " + e.getMessage(), e.getMessage().contains("Invalid Number: NOT_A_NUMBER")); } - - + + + } + + @Test + public void testFieldExistsQueries() throws SyntaxError { + SolrQueryRequest req = req(); + String[] fieldSuffix = new String[] { + "ti", "tf", "td", "tl", "tdt", + "pi", "pf", "pd", "pl", "pdt", + "i", "f", "d", "l", "dt", "s", "b", + "is", "fs", "ds", "ls", "dts", "ss", "bs", + "i_dv", "f_dv", "d_dv", "l_dv", "dt_dv", "s_dv", "b_dv", + "is_dv", "fs_dv", "ds_dv", "ls_dv", "dts_dv", "ss_dv", "bs_dv", + "i_dvo", "f_dvo", "d_dvo", "l_dvo", "dt_dvo", + "t", + "t_on", "b_norms", "s_norms", "dt_norms", "i_norms", "l_norms", "f_norms", "d_norms" + }; + String[] existenceQueries = new String[] { + "*", "[* TO *]" + }; + + for (String existenceQuery : existenceQueries) { + for (String suffix : fieldSuffix) { + IndexSchema indexSchema = h.getCore().getLatestSchema(); + String field = "foo_" + suffix; + String query = field + ":" + existenceQuery; + QParser qParser = QParser.getParser(query, req); + Query createdQuery = qParser.getQuery(); + SchemaField schemaField = indexSchema.getField(field); + + // Test float & double realNumber queries differently + if ("[* TO *]".equals(existenceQuery) && (schemaField.getType().getNumberType() == NumberType.DOUBLE || schemaField.getType().getNumberType() == NumberType.FLOAT)) { + assertFalse("For float and double fields \"" + query + "\" is not an existence query, so the query returned should not be a DocValuesFieldExistsQuery.", createdQuery instanceof DocValuesFieldExistsQuery); + assertFalse("For float and double fields \"" + query + "\" is not an existence query, so the query returned should not be a NormsFieldExistsQuery.", createdQuery instanceof NormsFieldExistsQuery); + assertFalse("For float and double fields \"" + query + "\" is not an existence query, so NaN should not be matched via a ConstantScoreQuery.", createdQuery instanceof ConstantScoreQuery); + assertFalse("For float and double fields\"" + query + "\" is not an existence query, so NaN should not be matched via a BooleanQuery (NaN and [* TO *]).", createdQuery instanceof BooleanQuery); + } else { + if (schemaField.hasDocValues()) { + assertTrue("Field has docValues, so existence query \"" + query + "\" should return DocValuesFieldExistsQuery", createdQuery instanceof DocValuesFieldExistsQuery); + } else if (!schemaField.omitNorms() && !schemaField.getType().isPointField()) { //TODO: Remove !isPointField() for SOLR-14199 + assertTrue("Field has norms and no docValues, so existence query \"" + query + "\" should return NormsFieldExistsQuery", createdQuery instanceof NormsFieldExistsQuery); + } else if (schemaField.getType().getNumberType() == NumberType.DOUBLE || schemaField.getType().getNumberType() == NumberType.FLOAT) { + assertTrue("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\".", createdQuery instanceof ConstantScoreQuery); + assertTrue("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\".", ((ConstantScoreQuery)createdQuery).getQuery() instanceof BooleanQuery); + assertEquals("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\". This boolean query must be an OR.", 1, ((BooleanQuery)((ConstantScoreQuery)createdQuery).getQuery()).getMinimumNumberShouldMatch()); + assertEquals("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\". This boolean query must have 2 clauses.", 2, ((BooleanQuery)((ConstantScoreQuery)createdQuery).getQuery()).clauses().size()); + } else { + assertFalse("Field doesn't have docValues, so existence query \"" + query + "\" should not return DocValuesFieldExistsQuery", createdQuery instanceof DocValuesFieldExistsQuery); + assertFalse("Field doesn't have norms, so existence query \"" + query + "\" should not return NormsFieldExistsQuery", createdQuery instanceof NormsFieldExistsQuery); + } + } + } + } } } diff --git a/solr/solr-ref-guide/src/the-standard-query-parser.adoc b/solr/solr-ref-guide/src/the-standard-query-parser.adoc index 8507c0abd60..c572e503e5b 100644 --- a/solr/solr-ref-guide/src/the-standard-query-parser.adoc +++ b/solr/solr-ref-guide/src/the-standard-query-parser.adoc @@ -146,6 +146,15 @@ To perform a proximity search, add the tilde character ~ and a numeric value to The distance referred to here is the number of term movements needed to match the specified phrase. In the example above, if "apache" and "jakarta" were 10 spaces apart in a field, but "apache" appeared before "jakarta", more than 10 term movements would be required to move the terms together and position "apache" to the right of "jakarta" with a space in between. +=== Existence Searches + +An existence search for a field matches all documents where a value exists for that field. +To query for a field existing, simply use a wildcard instead of a term in the search. + +`field:*` + +A field will be considered to "exist" if it has any value, even values which are often considered "not existent". (e.g. `NaN`, `""`, etc.) + === Range Searches A range search specifies a range of values for a field (a range with an upper bound and a lower bound). The query matches documents whose values for the specified field or fields fall within the range. Range queries can be inclusive or exclusive of the upper and lower bounds. Sorting is done lexicographically, except on numeric fields. For example, the range query below matches all documents whose `popularity` field has a value between 52 and 10,000, inclusive. @@ -164,6 +173,25 @@ The brackets around a query determine its inclusiveness. * Curly brackets `{` & `}` denote an exclusive range query that matches values between the upper and lower bounds, but excluding the upper and lower bounds themselves. * You can mix these types so one end of the range is inclusive and the other is exclusive. Here's an example: `count:{1 TO 10]` +Wildcards, `*`, can also be used for either or both endpoints to specify an open-ended range query. +This is a <<#differences-between-lucenes-classic-query-parser-and-solrs-standard-query-parser,divergence from Lucene's Classic Query Parser>>. + +* `field:[* TO 100]` finds all field values less than or equal to 100. +* `field:[100 TO *]` finds all field values greater than or equal to 100. +* `field:[* TO *]` finds any document with a value between the effective values of -Infinity and +Infinity for that field type. + + +[NOTE] +.Matching `NaN` values with wildcards +==== +For most fields, unbounded range queries, `field:[* TO *]`, are equivalent to existence queries, `field: *` . +However for float/double types that support `NaN` values, these two queries perform differently. + +* `field:*` matches all existing values, including `NaN` +* `field:[* TO *]` matches all real values, excluding `NaN` +==== + + === Boosting a Term with "^" Lucene/Solr provides the relevance level of matching documents based on the terms found. To boost a term use the caret symbol `^` with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be. @@ -322,13 +350,14 @@ Comments may be nested. Solr's standard query parser originated as a variation of Lucene's "classic" QueryParser. It diverges in the following ways: -* A `*` may be used for either or both endpoints to specify an open-ended range query +* A `*` may be used for either or both endpoints to specify an open-ended range query, or by itself as an existence query. ** `field:[* TO 100]` finds all field values less than or equal to 100 ** `field:[100 TO *]` finds all field values greater than or equal to 100 -** `field:[* TO *]` matches all documents with the field +** `field:[* TO *]` finds all documents where the field has a value between `-Infinity` and `Infinity`, excluding `NaN`. +** `field:*` finds all documents where the field exists (i.e. has any value). * Pure negative queries (all clauses prohibited) are allowed (only as a top-level clause) ** `-inStock:false` finds all field values where inStock is not false -** `-field:[* TO *]` finds all documents without a value for field +** `-field:*` finds all documents without a value for the field. * Support for embedded Solr queries (sub-queries) using any type of query parser as a nested clause using the local-params syntax. ** `inStock:true OR {!dismax qf='name manu' v='ipod'}` +