SOLR-11746: Adding existence queries for PointFields

* DocValuesFieldExistsQuery and NormsFieldExistsQuery are used for existence queries when possible. * Added documentation on the difference between field:* and field:[* TO *]
2020-01-22 17:48:55 -05:00 · 2020-01-22 17:48:55 -05:00 · ffba54a827
parent 1051db4038
commit ffba54a827
21 changed files with 418 additions and 50 deletions
--- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
+++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
@ -266,7 +266,7 @@ public class ICUCollationField extends FieldType {
  }
  
  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    String f = field.getName();
    BytesRef low = part1 == null ? null : getCollationKey(f, part1);
    BytesRef high = part2 == null ? null : getCollationKey(f, part2);
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@ -1184,14 +1184,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
    // Solr has always used constant scoring for prefix queries.  This should return constant scoring by default.
    return newPrefixQuery(new Term(field, termStr));
  }
+  // called from parser
+  protected Query getExistenceQuery(String field) {
+    checkNullField(field);
+    SchemaField sf = schema.getField(field);
+    return sf.getType().getExistenceQuery(parser, sf);
+  }

  // called from parser
  protected Query getWildcardQuery(String field, String termStr) throws SyntaxError {
    checkNullField(field);
-    // *:* -> MatchAllDocsQuery
+
    if ("*".equals(termStr)) {
      if ("*".equals(field) || getExplicitField() == null) {
+        // '*:*' and '*' -> MatchAllDocsQuery
        return newMatchAllDocsQuery();
+      } else {
+        // 'foo:*' -> existenceQuery
+        return getExistenceQuery(field);
      }
    }

--- a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java
@ -316,7 +316,7 @@ public abstract class AbstractSpatialFieldType<T extends SpatialStrategy> extend
  }

  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    if (!minInclusive || !maxInclusive)
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Both sides of spatial range query must be inclusive: " + field.getName());
    Point p1 = SpatialUtils.parsePointSolrException(part1, ctx);
--- a/solr/core/src/java/org/apache/solr/schema/CollationField.java
+++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java
@ -236,7 +236,7 @@ public class CollationField extends FieldType {
  }
  
  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    String f = field.getName();
    BytesRef low = part1 == null ? null : getCollationKey(f, part1);
    BytesRef high = part2 == null ? null : getCollationKey(f, part2);
--- a/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/CurrencyFieldType.java
@ -251,7 +251,7 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc
    CurrencyValue valueDefault;
    valueDefault = value.convertTo(provider, defaultCurrency);

-    return getRangeQuery(parser, field, valueDefault, valueDefault, true, true);
+    return getRangeQueryInternal(parser, field, valueDefault, valueDefault, true, true);
  }

  /**
@ -316,8 +316,18 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc
        source);
  }

+  /**
+   * Override the default existenceQuery implementation to run an existence query on the underlying amountField instead.
+   */
  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, final boolean minInclusive, final boolean maxInclusive) {
+  public Query getExistenceQuery(QParser parser, SchemaField field) {
+    // Use an existence query of the underlying amount field
+    SchemaField amountField = getAmountField(field);
+    return amountField.getType().getExistenceQuery(parser, amountField);
+  }
+
+  @Override
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, final boolean minInclusive, final boolean maxInclusive) {
    final CurrencyValue p1 = CurrencyValue.parse(part1, defaultCurrency);
    final CurrencyValue p2 = CurrencyValue.parse(part2, defaultCurrency);

@ -327,10 +337,10 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc
              ": range queries only supported when upper and lower bound have same currency.");
    }

-    return getRangeQuery(parser, field, p1, p2, minInclusive, maxInclusive);
+    return getRangeQueryInternal(parser, field, p1, p2, minInclusive, maxInclusive);
  }

-  public Query getRangeQuery(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) {
+  private Query getRangeQueryInternal(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) {
    String currencyCode = (p1 != null) ? p1.getCurrencyCode() :
        (p2 != null) ? p2.getCurrencyCode() : defaultCurrency;

--- a/solr/core/src/java/org/apache/solr/schema/DateRangeField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DateRangeField.java
@ -143,7 +143,7 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType<NumberRan
  }

  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String startStr, String endStr, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String startStr, String endStr, boolean minInclusive, boolean maxInclusive) {
    if (parser == null) {//null when invoked by SimpleFacets.  But getQueryFromSpatialArgs expects to get localParams.
      final SolrRequestInfo requestInfo = SolrRequestInfo.getRequestInfo();
      parser = new QParser("", null, requestInfo.getReq().getParams(), requestInfo.getReq()) {
--- a/solr/core/src/java/org/apache/solr/schema/EnumField.java
+++ b/solr/core/src/java/org/apache/solr/schema/EnumField.java
@ -63,13 +63,13 @@ public class EnumField extends AbstractEnumField {
  }

  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
    Integer minValue = enumMapping.stringValueToIntValue(min);
    Integer maxValue = enumMapping.stringValueToIntValue(max);

    if (field.multiValued() && field.hasDocValues() && !field.indexed()) {
      // for the multi-valued dv-case, the default rangeimpl over toInternal is correct
-      return super.getRangeQuery(parser, field, minValue.toString(), maxValue.toString(), minInclusive, maxInclusive);
+      return super.getSpecializedRangeQuery(parser, field, minValue.toString(), maxValue.toString(), minInclusive, maxInclusive);
    }
    Query query = null;
    final boolean matchOnly = field.hasDocValues() && !field.indexed();
--- a/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/EnumFieldType.java
@ -57,7 +57,7 @@ public class EnumFieldType extends AbstractEnumField {
  }
  
  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
    Integer minValue = enumMapping.stringValueToIntValue(min);
    Integer maxValue = enumMapping.stringValueToIntValue(max);

--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -43,8 +43,10 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.DocValuesRewriteMethod;
 import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.NormsFieldExistsQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortedNumericSelector;
@ -457,11 +459,13 @@ public abstract class FieldType extends FieldProperties {
   *
   * @param parser       the {@link org.apache.solr.search.QParser} calling the method
   * @param sf           the schema field
-   * @param termStr      the term string for prefix query
+   * @param termStr      the term string for prefix query, if blank then this query should match all docs with this field
   * @return a Query instance to perform prefix search
-   *
   */
  public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
+    if ("".equals(termStr)) {
+      return getExistenceQuery(parser, sf);
+    }
    PrefixQuery query = new PrefixQuery(new Term(sf.getName(), termStr));
    query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf));
    return query;
@ -847,16 +851,52 @@ public abstract class FieldType extends FieldProperties {
    return null;
  }

+  /**
+   * Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser}
+   * currently passes <code>part1</code> and <code>part2</code> as null if they are '*' respectively. <code>minInclusive</code> and <code>maxInclusive</code> are both true
+   * currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have
+   * different semantics.
+   * <p>
+   * By default range queries with '*'s or nulls on either side are treated as existence queries and are created with {@link #getExistenceQuery}.
+   * If unbounded range queries should not be treated as existence queries for a certain fieldType, then {@link #treatUnboundedRangeAsExistence} should be overriden.
+   * <p>
+   * Sub-classes should override the {@link #getSpecializedRangeQuery} method to provide their own range query implementation.
+   *
+   * @param parser       the {@link org.apache.solr.search.QParser} calling the method
+   * @param field        the schema field
+   * @param part1        the lower boundary of the range, nulls are allowed.
+   * @param part2        the upper boundary of the range, nulls are allowe
+   * @param minInclusive whether the minimum of the range is inclusive or not
+   * @param maxInclusive whether the maximum of the range is inclusive or not
+   * @return a Query instance to perform range search according to given parameters
+   */
+  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+    if (part1 == null && part2 == null && treatUnboundedRangeAsExistence(field)) {
+      return getExistenceQuery(parser, field);
+    }
+    return getSpecializedRangeQuery(parser, field, part1, part2, minInclusive, maxInclusive);
+  }

+  /**
+   * Returns whether an unbounded range query should be treated the same as an existence query for the given field type.
+   *
+   * @param field the schema field
+   * @return whether unbounded range and existence are equivalent for the given field type.
+   */
+  protected boolean treatUnboundedRangeAsExistence(SchemaField field) {
+    return true;
+  }

  /**
   * Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser}
-   * currently passes part1 and part2 as null if they are '*' respectively. minInclusive and maxInclusive are both true
+   * currently passes <code>part1</code> and <code>part2</code> as null if they are '*' respectively. <code>minInclusive</code> and <code>maxInclusive</code> are both true
   * currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have
   * different semantics.
   * <p>
   * Sub-classes should override this method to provide their own range query implementation. They should strive to
-   * handle nulls in part1 and/or part2 as well as unequal minInclusive and maxInclusive parameters gracefully.
+   * handle nulls in <code>part1</code> and/or <code>part2</code> as well as unequal <code>minInclusive</code> and <code>maxInclusive</code> parameters gracefully.
+   * <p>
+   * This method does not, and should not, check for or handle existence queries, please look at {@link #getRangeQuery} for that logic.
   *
   * @param parser       the {@link org.apache.solr.search.QParser} calling the method
   * @param field        the schema field
@ -867,10 +907,11 @@ public abstract class FieldType extends FieldProperties {
   *  @return a Query instance to perform range search according to given parameters
   *
   */
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    // TODO: change these all to use readableToIndexed/bytes instead (e.g. for unicode collation)
    final BytesRef miValue = part1 == null ? null : new BytesRef(toInternal(part1));
    final BytesRef maxValue = part2 == null ? null : new BytesRef(toInternal(part2));
+
    if (field.hasDocValues() && !field.indexed()) {
      return SortedSetDocValuesField.newSlowRangeQuery(
          field.getName(),
@ -885,13 +926,48 @@ public abstract class FieldType extends FieldProperties {
    }
  }

+  /**
+   * Returns a Query instance for doing existence searches for a field.
+   * If the field does not have docValues or norms, this method will call {@link #getSpecializedExistenceQuery}, which defaults to an unbounded rangeQuery.
+   * <p>
+   * This method should only be overriden whenever a fieldType does not support {@link org.apache.lucene.search.DocValuesFieldExistsQuery} or {@link org.apache.lucene.search.NormsFieldExistsQuery}.
+   * If a fieldType does not support an unbounded rangeQuery as an existenceQuery (such as <code>double</code> or <code>float</code> fields), {@link #getSpecializedExistenceQuery} should be overriden.
+   *
+   * @param parser The {@link org.apache.solr.search.QParser} calling the method
+   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
+   * @return The {@link org.apache.lucene.search.Query} instance.
+   */
+  public Query getExistenceQuery(QParser parser, SchemaField field) {
+    if (field.hasDocValues()) {
+      return new DocValuesFieldExistsQuery(field.getName());
+    } else if (!field.omitNorms() && !isPointField()) { //TODO: Remove !isPointField() for SOLR-14199
+      return new NormsFieldExistsQuery(field.getName());
+    } else {
+      // Default to an unbounded range query
+      return getSpecializedExistenceQuery(parser, field);
+    }
+  }
+
+  /**
+   * Returns a Query instance for doing existence searches for a field without certain options, such as docValues or norms.
+   * <p>
+   * This method can be overriden to implement specialized existence logic for fieldTypes.
+   * The default query returned is an unbounded range query.
+   *
+   * @param parser The {@link org.apache.solr.search.QParser} calling the method
+   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
+   * @return The {@link org.apache.lucene.search.Query} instance.
+   */
+  protected Query getSpecializedExistenceQuery(QParser parser, SchemaField field) {
+    return getSpecializedRangeQuery(parser, field, null, null, true, true);
+  }
+
  /**
   * Returns a Query instance for doing searches against a field.
   * @param parser The {@link org.apache.solr.search.QParser} calling the method
   * @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
   * @param externalVal The String representation of the value to search
   * @return The {@link org.apache.lucene.search.Query} instance.  This implementation returns a {@link org.apache.lucene.search.TermQuery} but overriding queries may not
-   * 
   */
  public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
    BytesRefBuilder br = new BytesRefBuilder();
--- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java
+++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java
@ -103,7 +103,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery


  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    Point p1 = SpatialUtils.parsePointSolrException(part1, SpatialContext.GEO);
    Point p2 = SpatialUtils.parsePointSolrException(part2, SpatialContext.GEO);

--- a/solr/core/src/java/org/apache/solr/schema/NumericFieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/NumericFieldType.java
@ -16,11 +16,16 @@
 */
 package org.apache.solr.schema;

+import java.util.EnumSet;
+
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.NumericUtils;
@ -302,4 +307,38 @@ public abstract class NumericFieldType extends PrimitiveFieldType {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg);
    }
  }
+
+  public static EnumSet<NumberType> doubleOrFloat = EnumSet.of(NumberType.FLOAT, NumberType.DOUBLE);
+
+  /**
+   * For doubles and floats, unbounded range queries (which do not match NaN values) are not equivalent to existence queries (which do match NaN values).
+   *
+   * The two types of queries are equivalent for all other numeric types.
+   *
+   * @param field the schema field
+   * @return false for double and float fields, true for all others
+   */
+  @Override
+  protected boolean treatUnboundedRangeAsExistence(SchemaField field) {
+    return !doubleOrFloat.contains(getNumberType());
+  }
+
+  /**
+   * Override the default existence behavior, so that the non-docValued/norms implementation matches NaN values for double and float fields.
+   * The [* TO *] query for those fields does not match 'NaN' values, so they must be matched separately.
+   * <p>
+   * For doubles and floats the query behavior is equivalent to (field:[* TO *] OR field:NaN).
+   * For all other numeric types, the default existence query behavior is used.
+   */
+  @Override
+  public Query getSpecializedExistenceQuery(QParser parser, SchemaField field) {
+    if (doubleOrFloat.contains(getNumberType())) {
+      return new ConstantScoreQuery(new BooleanQuery.Builder()
+          .add(getSpecializedRangeQuery(parser, field, null, null, true, true), BooleanClause.Occur.SHOULD)
+          .add(getFieldQuery(parser, field, Float.toString(Float.NaN)), BooleanClause.Occur.SHOULD)
+          .setMinimumNumberShouldMatch(1).build());
+    } else {
+      return super.getSpecializedExistenceQuery(parser, field);
+    }
+  }
 }
--- a/solr/core/src/java/org/apache/solr/schema/PointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/PointField.java
@ -161,11 +161,8 @@ public abstract class PointField extends NumericFieldType {

  protected abstract Query getExactQuery(SchemaField field, String externalVal);

-  public abstract Query getPointRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
-      boolean maxInclusive);
-
  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
                                           boolean maxInclusive) {
    if (!field.indexed() && field.hasDocValues()) {
      return getDocValuesRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
@ -178,6 +175,9 @@ public abstract class PointField extends NumericFieldType {
    }
  }

+  public abstract Query getPointRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
+                                           boolean maxInclusive);
+
  @Override
  public String storedToReadable(IndexableField f) {
    return toExternal(f);
@ -222,6 +222,9 @@ public abstract class PointField extends NumericFieldType {

  @Override
  public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
+    if ("".equals(termStr)) {
+      return getExistenceQuery(parser, sf);
+    }
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't run prefix queries on numeric fields");
  }
  
--- a/solr/core/src/java/org/apache/solr/schema/PointType.java
+++ b/solr/core/src/java/org/apache/solr/schema/PointType.java
@ -128,7 +128,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
  /**
   * Care should be taken in calling this with higher order dimensions for performance reasons.
   */
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    //Query could look like: [x1,y1 TO x2,y2] for 2 dimension, but could look like: [x1,y1,z1 TO x2,y2,z2], and can be extrapolated to n-dimensions
    //thus, this query essentially creates a box, cube, etc.
    String[] p1 = parseCommaSeparatedList(part1, dimension);
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -158,7 +158,7 @@ public class TextField extends FieldType {
  }

  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
    Analyzer multiAnalyzer = getMultiTermAnalyzer();
    BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
    BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
--- a/solr/core/src/java/org/apache/solr/schema/TrieField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java
@ -298,10 +298,10 @@ public class TrieField extends NumericFieldType {
  }

  @Override
-  public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
+  protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
    if (field.multiValued() && field.hasDocValues() && !field.indexed()) {
      // for the multi-valued dv-case, the default rangeimpl over toInternal is correct
-      return super.getRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
+      return super.getSpecializedRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
    }
    int ps = precisionStep;
    Query query;
--- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
@ -693,16 +693,26 @@
  <dynamicField name="*_ds_dv" type="double" indexed="true" stored="true" docValues="true" multiValued="true"/>
  <dynamicField name="*_d_dvo" type="double" indexed="false" stored="true" docValues="true"/>

+  <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+  <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
+  <dynamicField name="*_dt_dv" type="date" indexed="true" stored="true" docValues="true" multiValued="false"/>
+  <dynamicField name="*_dts_dv" type="date" indexed="true" stored="true" docValues="true" multiValued="true"/>
+  <dynamicField name="*_dt_dvo" type="date" indexed="false" stored="true" docValues="true"/>
+  
  <dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
  <!-- :TODO: why are these identical?!?!?! -->
  <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
  <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
+  <dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true"/>
  <dynamicField name="*_sdv" type="string" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
-  <dynamicField name="*_bdv" type="boolean" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
+  <dynamicField name="*_ss_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
  <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
  <dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
  <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
-  <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+  <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
+  <dynamicField name="*_bdv" type="boolean" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
+  <dynamicField name="*_b_dv" type="boolean" indexed="true" stored="true" docValues="true"/>
+  <dynamicField name="*_bs_dv" type="boolean" indexed="true" stored="true" docValues="true" multiValued="true"/>

  <dynamicField name="*_pi" type="pint" indexed="true" multiValued="false"/>
  <dynamicField name="*_pl" type="plong" indexed="true" multiValued="false"/>
@ -721,6 +731,17 @@
  <dynamicField name="*_iis" type="pint" indexed="false" stored="false" useDocValuesAsStored="true"/>
  <dynamicField name="*_ff" type="pfloat" indexed="false" stored="false" useDocValuesAsStored="false"/>

+  <!-- testing fields with & without norms
+       TODO: Remove numeric norms for SOLR-14199 -->
+  <dynamicField name="*_t_on" type="text" indexed="true" stored="true" omitNorms="true" docValues="false"/>
+  <dynamicField name="*_b_norms" type="boolean" indexed="true" stored="true" omitNorms="false" docValues="false"/>
+  <dynamicField name="*_s_norms" type="string" indexed="true" stored="true" omitNorms="false" docValues="false"/>
+  <dynamicField name="*_dt_norms" type="date" indexed="true" stored="true" omitNorms="true" docValues="false"/>
+  <dynamicField name="*_i_norms" type="int" indexed="true" stored="true" omitNorms="false" docValues="false"/>
+  <dynamicField name="*_l_norms" type="long" indexed="true" stored="true" omitNorms="false" docValues="false"/>
+  <dynamicField name="*_f_norms" type="float" indexed="true" stored="true" omitNorms="false" docValues="false"/>
+  <dynamicField name="*_d_norms" type="double" indexed="true" stored="true" omitNorms="false" docValues="false"/>
+
  <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  <dynamicField name="attr_*" type="text" indexed="true" stored="true" multiValued="true"/>

--- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
@ -577,7 +577,11 @@
  <!-- for in-place updates -->
  <dynamicField name="*_i_dvo" multiValued="false" type="int"   docValues="true" indexed="false" stored="false"/>
  <dynamicField name="*_f_dvo" multiValued="false" type="float" docValues="true" indexed="false" stored="false"/>
+  <dynamicField name="*_d_dvo" multiValued="false" type="double" docValues="true" indexed="false" stored="false"/>
  <dynamicField name="*_l_dvo" multiValued="false" type="long"  docValues="true" indexed="false" stored="false"/>
+  <dynamicField name="*_dt_dvo" multiValued="false" type="date" docValues="true" indexed="false" stored="false"/>
+  <dynamicField name="*_s_dvo" multiValued="false" type="string" docValues="true" indexed="false" stored="false"/>
+  <dynamicField name="*_b_dvo" multiValued="false" type="boolean" docValues="true" indexed="false" stored="false"/>
  
  <dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true"/>

@ -595,6 +599,7 @@
  <dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
  <dynamicField name="*_ws" type="nametext" indexed="true" stored="true"/>
  <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
+  <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
  <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
  <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
  <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
--- a/solr/core/src/test/org/apache/solr/schema/CurrencyFieldTypeTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/CurrencyFieldTypeTest.java
@ -226,6 +226,11 @@ public class CurrencyFieldTypeTest extends SolrTestCaseJ4 {
            fieldName+":[* TO *]"),
            "//*[@numFound='" + (2 + 10 + negDocs) + "']");

+    // Open ended ranges without currency
+    assertQ(req("fl", "*,score", "q",
+            fieldName+":*"),
+            "//*[@numFound='" + (2 + 10 + negDocs) + "']");
+    
    // Open ended ranges with currency
    assertQ(req("fl", "*,score", "q",
            fieldName+":[*,EUR TO *,EUR]"),
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@ -16,6 +16,7 @@
 */
 package org.apache.solr.search;

+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@ -94,6 +95,21 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
                      " +apache +solr");
  }

+  public void testQueryLuceneAllDocsWithField() throws Exception {
+    // for all "primative" types except for doubles/floats, 'foo:*' should be functionally equivilent to "foo:[* TO *]"
+    // whatever implementation/optimizations exist for one syntax, should exist for the other syntax as well
+    // (regardless of docValues, multivalued, etc...)
+    for (String field : Arrays.asList("foo_sI", "foo_sS", "foo_s1", "foo_s",
+                                      "t_foo", "tv_foo", "tv_mv_foo",
+                                      "foo_b", "foo_b_dvo",
+                                      "foo_i", "foo_is", "foo_i_dvo",
+                                      "foo_l", "foo_l_dvo",
+                                      "foo_dt", "foo_dt_dvo")) {
+
+      assertQueryEquals("lucene", field + ":*", field + ":[* TO *]");
+    }
+  }
+
  public void testQueryPrefix() throws Exception {
    SolrQueryRequest req = req("myField","foo_s");
    try {
--- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
@ -17,6 +17,7 @@
 package org.apache.solr.search;

 import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@ -28,13 +29,16 @@ import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.NormsFieldExistsQuery;
 import org.apache.lucene.search.PointInSetQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.MapSolrParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
@ -44,7 +48,10 @@ import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.parser.QueryParser;
 import org.apache.solr.query.FilterQuery;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.NumberType;
 import org.apache.solr.schema.SchemaField;
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;

@ -59,6 +66,15 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
    createIndex();
  }

+  private static final List<String> HAS_VAL_FIELDS = new ArrayList<String>(41);
+  private static final List<String> HAS_NAN_FIELDS = new ArrayList<String>(12);
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    HAS_VAL_FIELDS.clear();
+    HAS_NAN_FIELDS.clear();
+  }
+  
  public static void createIndex() {
    String v;
    v = "how now brown cow";
@ -74,11 +90,97 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {

    assertU(adoc("id", "20", "syn", "wifi ATM"));

+    { // make a doc that has a value in *lots* of fields that no other doc has
+      SolrInputDocument doc = sdoc("id", "999");
+      
+      // numbers...
+      for (String t : Arrays.asList("i", "l", "f", "d")) { 
+        for (String s : Arrays.asList("", "s", "_dv", "s_dv", "_dvo", "_norms")) {
+          final String f = "has_val_" + t + s;
+          HAS_VAL_FIELDS.add(f);
+          doc.addField(f, "42");
+
+          if (t.equals("f") || t.equals("d")) {
+            String nanField = "nan_val_" + t + s;
+            doc.addField(nanField, "NaN");
+            HAS_NAN_FIELDS.add(nanField);
+
+            // Add a NaN & non-NaN value for multivalue fields, these should match :* and :[* TO *] equivalently
+            if (s.startsWith("s")) {
+              String bothField = "both_val_" + t + s;
+              doc.addField(bothField, "42");
+              doc.addField(bothField, "NaN");
+              HAS_VAL_FIELDS.add(bothField);
+            }
+          }
+        }
+      }
+      // boolean...booleans
+      for (String s : Arrays.asList("", "s", "_dv", "_norms")) {
+        final String f = "has_val_b" + s;
+        HAS_VAL_FIELDS.add(f);
+        doc.addField(f, "false");
+      }
+
+      // dates (and strings/text -- they don't care about the format)...
+      for (String s : Arrays.asList("dt", "s", "s1", "t", "t_on", "dt_norms", "s_norms", "dt_dv", "s_dv")) {
+        final String f = "has_val_" + s;
+        HAS_VAL_FIELDS.add(f);
+        doc.addField(f, "2019-01-12T00:00:00Z");
+      }
+      assertU(adoc(doc));
+    }
+            
    assertU(adoc("id", "30", "shingle23", "A B X D E"));

    assertU(commit());
  }

+  public void testDocsWithValuesInField() throws Exception {
+    assertEquals("someone changed the test setup of HAS_VAL_FIELDS, w/o updating the sanity check",
+                 41, HAS_VAL_FIELDS.size());
+    for (String f : HAS_VAL_FIELDS) {
+      // for all of these fields, these 2 syntaxes should be functionally equivilent
+      // in matching the one doc that contains these fields
+      for (String q : Arrays.asList( f + ":*", f + ":[* TO *]" )) {
+        assertJQ(req("q", q)
+                 , "/response/numFound==1"
+                 , "/response/docs/[0]/id=='999'"
+                 );
+        // the same syntaxes should be valid even if no doc has the field...
+        assertJQ(req("q", "bogus___" + q)
+                 , "/response/numFound==0"
+                 );
+
+      }
+    }
+  }
+
+  public void testDocsWithNaNInField() throws Exception {
+    assertEquals("someone changed the test setup of HAS_NAN_FIELDS, w/o updating the sanity check",
+        12, HAS_NAN_FIELDS.size());
+    for (String f : HAS_NAN_FIELDS) {
+      // for all of these fields, field:* should NOT be equivalent to field:[* TO *]
+      assertJQ(req("q", f + ":*")
+          , "/response/numFound==1"
+          , "/response/docs/[0]/id=='999'"
+      );
+      assertJQ(req("q", f + ":[* TO *]")
+          , "/response/numFound==0"
+      );
+      assertJQ(req("q", f + ":[-Infinity TO Infinity]")
+          , "/response/numFound==0"
+      );
+      for (String q : Arrays.asList( f + ":*", f + ":[* TO *]", f + ":[-Infinity TO Infinity]" )) {
+        // the same syntaxes should be valid even if no doc has the field...
+        assertJQ(req("q", "bogus___" + q)
+            , "/response/numFound==0"
+        );
+
+      }
+    }
+  }
+  
  @Test
  public void testPhrase() {
    // "text" field's type has WordDelimiterGraphFilter (WDGFF) and autoGeneratePhraseQueries=true
@ -1153,4 +1255,56 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {


  }
+
+  @Test
+  public void testFieldExistsQueries() throws SyntaxError {
+    SolrQueryRequest req = req();
+    String[] fieldSuffix = new String[] {
+        "ti", "tf", "td", "tl", "tdt",
+        "pi", "pf", "pd", "pl", "pdt",
+        "i", "f", "d", "l", "dt", "s", "b",
+        "is", "fs", "ds", "ls", "dts", "ss", "bs",
+        "i_dv", "f_dv", "d_dv", "l_dv", "dt_dv", "s_dv", "b_dv",
+        "is_dv", "fs_dv", "ds_dv", "ls_dv", "dts_dv", "ss_dv", "bs_dv",
+        "i_dvo", "f_dvo", "d_dvo", "l_dvo", "dt_dvo",
+        "t",
+        "t_on", "b_norms", "s_norms", "dt_norms", "i_norms", "l_norms", "f_norms", "d_norms"
+    };
+    String[] existenceQueries = new String[] {
+        "*", "[* TO *]"
+    };
+
+    for (String existenceQuery : existenceQueries) {
+      for (String suffix : fieldSuffix) {
+        IndexSchema indexSchema = h.getCore().getLatestSchema();
+        String field = "foo_" + suffix;
+        String query = field + ":" + existenceQuery;
+        QParser qParser = QParser.getParser(query, req);
+        Query createdQuery = qParser.getQuery();
+        SchemaField schemaField = indexSchema.getField(field);
+
+        // Test float & double realNumber queries differently
+        if ("[* TO *]".equals(existenceQuery) && (schemaField.getType().getNumberType() == NumberType.DOUBLE || schemaField.getType().getNumberType() == NumberType.FLOAT)) {
+          assertFalse("For float and double fields \"" + query + "\" is not an existence query, so the query returned should not be a DocValuesFieldExistsQuery.", createdQuery instanceof DocValuesFieldExistsQuery);
+          assertFalse("For float and double fields \"" + query + "\" is not an existence query, so the query returned should not be a NormsFieldExistsQuery.", createdQuery instanceof NormsFieldExistsQuery);
+          assertFalse("For float and double fields \"" + query + "\" is not an existence query, so NaN should not be matched via a ConstantScoreQuery.", createdQuery instanceof ConstantScoreQuery);
+          assertFalse("For float and double fields\"" + query + "\" is not an existence query, so NaN should not be matched via a BooleanQuery (NaN and [* TO *]).", createdQuery instanceof BooleanQuery);
+        } else {
+          if (schemaField.hasDocValues()) {
+            assertTrue("Field has docValues, so existence query \"" + query + "\" should return DocValuesFieldExistsQuery", createdQuery instanceof DocValuesFieldExistsQuery);
+          } else if (!schemaField.omitNorms() && !schemaField.getType().isPointField()) { //TODO: Remove !isPointField() for SOLR-14199
+            assertTrue("Field has norms and no docValues, so existence query \"" + query + "\" should return NormsFieldExistsQuery", createdQuery instanceof NormsFieldExistsQuery);
+          } else if (schemaField.getType().getNumberType() == NumberType.DOUBLE || schemaField.getType().getNumberType() == NumberType.FLOAT) {
+            assertTrue("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\".", createdQuery instanceof ConstantScoreQuery);
+            assertTrue("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\".", ((ConstantScoreQuery)createdQuery).getQuery() instanceof BooleanQuery);
+            assertEquals("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\". This boolean query must be an OR.", 1, ((BooleanQuery)((ConstantScoreQuery)createdQuery).getQuery()).getMinimumNumberShouldMatch());
+            assertEquals("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\". This boolean query must have 2 clauses.", 2, ((BooleanQuery)((ConstantScoreQuery)createdQuery).getQuery()).clauses().size());
+          } else {
+            assertFalse("Field doesn't have docValues, so existence query \"" + query + "\" should not return DocValuesFieldExistsQuery", createdQuery instanceof DocValuesFieldExistsQuery);
+            assertFalse("Field doesn't have norms, so existence query \"" + query + "\" should not return NormsFieldExistsQuery", createdQuery instanceof NormsFieldExistsQuery);
+          }
+        }
+      }
+    }
+  }
 }
--- a/solr/solr-ref-guide/src/the-standard-query-parser.adoc
+++ b/solr/solr-ref-guide/src/the-standard-query-parser.adoc
@ -146,6 +146,15 @@ To perform a proximity search, add the tilde character ~ and a numeric value to

 The distance referred to here is the number of term movements needed to match the specified phrase. In the example above, if "apache" and "jakarta" were 10 spaces apart in a field, but "apache" appeared before "jakarta", more than 10 term movements would be required to move the terms together and position "apache" to the right of "jakarta" with a space in between.

+=== Existence Searches
+
+An existence search for a field matches all documents where a value exists for that field.
+To query for a field existing, simply use a wildcard instead of a term in the search.
+
+`field:*`
+
+A field will be considered to "exist" if it has any value, even values which are often considered "not existent". (e.g. `NaN`, `""`, etc.)
+
 === Range Searches

 A range search specifies a range of values for a field (a range with an upper bound and a lower bound). The query matches documents whose values for the specified field or fields fall within the range. Range queries can be inclusive or exclusive of the upper and lower bounds. Sorting is done lexicographically, except on numeric fields. For example, the range query below matches all documents whose `popularity` field has a value between 52 and 10,000, inclusive.
@ -164,6 +173,25 @@ The brackets around a query determine its inclusiveness.
 * Curly brackets `{` & `}` denote an exclusive range query that matches values between the upper and lower bounds, but excluding the upper and lower bounds themselves.
 * You can mix these types so one end of the range is inclusive and the other is exclusive. Here's an example: `count:{1 TO 10]`

+Wildcards, `*`, can also be used for either or both endpoints to specify an open-ended range query.
+This is a <<#differences-between-lucenes-classic-query-parser-and-solrs-standard-query-parser,divergence from Lucene's Classic Query Parser>>.
+
+* `field:[* TO 100]` finds all field values less than or equal to 100.
+* `field:[100 TO *]` finds all field values greater than or equal to 100.
+* `field:[* TO *]` finds any document with a value between the effective values of -Infinity and +Infinity for that field type.
+
+
+[NOTE]
+.Matching `NaN` values with wildcards
+====
+For most fields, unbounded range queries, `field:[* TO *]`, are equivalent to existence queries, `field: *` .
+However for float/double types that support `NaN` values, these two queries perform differently.
+
+* `field:*` matches all existing values, including `NaN`
+* `field:[* TO *]` matches all real values, excluding `NaN`
+====
+
+
 === Boosting a Term with "^"

 Lucene/Solr provides the relevance level of matching documents based on the terms found. To boost a term use the caret symbol `^` with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be.
@ -322,13 +350,14 @@ Comments may be nested.

 Solr's standard query parser originated as a variation of Lucene's "classic" QueryParser.  It diverges in the following ways:

-* A `*` may be used for either or both endpoints to specify an open-ended range query
+* A `*` may be used for either or both endpoints to specify an open-ended range query, or by itself as an existence query.
 ** `field:[* TO 100]` finds all field values less than or equal to 100
 ** `field:[100 TO *]` finds all field values greater than or equal to 100
-** `field:[* TO *]` matches all documents with the field
+** `field:[* TO *]` finds all documents where the field has a value between `-Infinity` and `Infinity`, excluding `NaN`.
+** `field:*` finds all documents where the field exists (i.e. has any value).
 * Pure negative queries (all clauses prohibited) are allowed (only as a top-level clause)
 ** `-inStock:false` finds all field values where inStock is not false
-** `-field:[* TO *]` finds all documents without a value for field
+** `-field:*` finds all documents without a value for the field.
 * Support for embedded Solr queries (sub-queries) using any type of query parser as a nested clause using the local-params syntax.
 ** `inStock:true OR {!dismax qf='name manu' v='ipod'}`
 +