SOLR-11746: Adding existence queries for PointFields

* DocValuesFieldExistsQuery and NormsFieldExistsQuery are used for existence queries when possible.
* Added documentation on the difference between field:* and field:[* TO *]
This commit is contained in:
Houston Putman 2020-01-22 17:48:55 -05:00
parent 1051db4038
commit ffba54a827
21 changed files with 418 additions and 50 deletions

View File

@ -266,7 +266,7 @@ public class ICUCollationField extends FieldType {
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
String f = field.getName();
BytesRef low = part1 == null ? null : getCollationKey(f, part1);
BytesRef high = part2 == null ? null : getCollationKey(f, part2);

View File

@ -1184,14 +1184,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// Solr has always used constant scoring for prefix queries. This should return constant scoring by default.
return newPrefixQuery(new Term(field, termStr));
}
// called from parser
protected Query getExistenceQuery(String field) {
checkNullField(field);
SchemaField sf = schema.getField(field);
return sf.getType().getExistenceQuery(parser, sf);
}
// called from parser
protected Query getWildcardQuery(String field, String termStr) throws SyntaxError {
checkNullField(field);
// *:* -> MatchAllDocsQuery
if ("*".equals(termStr)) {
if ("*".equals(field) || getExplicitField() == null) {
// '*:*' and '*' -> MatchAllDocsQuery
return newMatchAllDocsQuery();
} else {
// 'foo:*' -> existenceQuery
return getExistenceQuery(field);
}
}

View File

@ -316,7 +316,7 @@ public abstract class AbstractSpatialFieldType<T extends SpatialStrategy> extend
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
if (!minInclusive || !maxInclusive)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Both sides of spatial range query must be inclusive: " + field.getName());
Point p1 = SpatialUtils.parsePointSolrException(part1, ctx);

View File

@ -236,7 +236,7 @@ public class CollationField extends FieldType {
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
String f = field.getName();
BytesRef low = part1 == null ? null : getCollationKey(f, part1);
BytesRef high = part2 == null ? null : getCollationKey(f, part2);

View File

@ -251,7 +251,7 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc
CurrencyValue valueDefault;
valueDefault = value.convertTo(provider, defaultCurrency);
return getRangeQuery(parser, field, valueDefault, valueDefault, true, true);
return getRangeQueryInternal(parser, field, valueDefault, valueDefault, true, true);
}
/**
@ -316,8 +316,18 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc
source);
}
/**
* Override the default existenceQuery implementation to run an existence query on the underlying amountField instead.
*/
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, final boolean minInclusive, final boolean maxInclusive) {
public Query getExistenceQuery(QParser parser, SchemaField field) {
// Use an existence query of the underlying amount field
SchemaField amountField = getAmountField(field);
return amountField.getType().getExistenceQuery(parser, amountField);
}
@Override
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, final boolean minInclusive, final boolean maxInclusive) {
final CurrencyValue p1 = CurrencyValue.parse(part1, defaultCurrency);
final CurrencyValue p2 = CurrencyValue.parse(part2, defaultCurrency);
@ -327,10 +337,10 @@ public class CurrencyFieldType extends FieldType implements SchemaAware, Resourc
": range queries only supported when upper and lower bound have same currency.");
}
return getRangeQuery(parser, field, p1, p2, minInclusive, maxInclusive);
return getRangeQueryInternal(parser, field, p1, p2, minInclusive, maxInclusive);
}
public Query getRangeQuery(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) {
private Query getRangeQueryInternal(QParser parser, SchemaField field, final CurrencyValue p1, final CurrencyValue p2, final boolean minInclusive, final boolean maxInclusive) {
String currencyCode = (p1 != null) ? p1.getCurrencyCode() :
(p2 != null) ? p2.getCurrencyCode() : defaultCurrency;

View File

@ -143,7 +143,7 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType<NumberRan
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String startStr, String endStr, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String startStr, String endStr, boolean minInclusive, boolean maxInclusive) {
if (parser == null) {//null when invoked by SimpleFacets. But getQueryFromSpatialArgs expects to get localParams.
final SolrRequestInfo requestInfo = SolrRequestInfo.getRequestInfo();
parser = new QParser("", null, requestInfo.getReq().getParams(), requestInfo.getReq()) {

View File

@ -63,13 +63,13 @@ public class EnumField extends AbstractEnumField {
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
Integer minValue = enumMapping.stringValueToIntValue(min);
Integer maxValue = enumMapping.stringValueToIntValue(max);
if (field.multiValued() && field.hasDocValues() && !field.indexed()) {
// for the multi-valued dv-case, the default rangeimpl over toInternal is correct
return super.getRangeQuery(parser, field, minValue.toString(), maxValue.toString(), minInclusive, maxInclusive);
return super.getSpecializedRangeQuery(parser, field, minValue.toString(), maxValue.toString(), minInclusive, maxInclusive);
}
Query query = null;
final boolean matchOnly = field.hasDocValues() && !field.indexed();

View File

@ -57,7 +57,7 @@ public class EnumFieldType extends AbstractEnumField {
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
Integer minValue = enumMapping.stringValueToIntValue(min);
Integer maxValue = enumMapping.stringValueToIntValue(max);

View File

@ -43,8 +43,10 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.DocValuesRewriteMethod;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortedNumericSelector;
@ -457,11 +459,13 @@ public abstract class FieldType extends FieldProperties {
*
* @param parser the {@link org.apache.solr.search.QParser} calling the method
* @param sf the schema field
* @param termStr the term string for prefix query
* @param termStr the term string for prefix query, if blank then this query should match all docs with this field
* @return a Query instance to perform prefix search
*
*/
public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
if ("".equals(termStr)) {
return getExistenceQuery(parser, sf);
}
PrefixQuery query = new PrefixQuery(new Term(sf.getName(), termStr));
query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf));
return query;
@ -846,17 +850,53 @@ public abstract class FieldType extends FieldProperties {
// trivial base case
return null;
}
/**
* Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser}
* currently passes part1 and part2 as null if they are '*' respectively. minInclusive and maxInclusive are both true
* currently passes <code>part1</code> and <code>part2</code> as null if they are '*' respectively. <code>minInclusive</code> and <code>maxInclusive</code> are both true
* currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have
* different semantics.
* <p>
* By default range queries with '*'s or nulls on either side are treated as existence queries and are created with {@link #getExistenceQuery}.
* If unbounded range queries should not be treated as existence queries for a certain fieldType, then {@link #treatUnboundedRangeAsExistence} should be overriden.
* <p>
* Sub-classes should override the {@link #getSpecializedRangeQuery} method to provide their own range query implementation.
*
* @param parser the {@link org.apache.solr.search.QParser} calling the method
* @param field the schema field
* @param part1 the lower boundary of the range, nulls are allowed.
* @param part2 the upper boundary of the range, nulls are allowe
* @param minInclusive whether the minimum of the range is inclusive or not
* @param maxInclusive whether the maximum of the range is inclusive or not
* @return a Query instance to perform range search according to given parameters
*/
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
if (part1 == null && part2 == null && treatUnboundedRangeAsExistence(field)) {
return getExistenceQuery(parser, field);
}
return getSpecializedRangeQuery(parser, field, part1, part2, minInclusive, maxInclusive);
}
/**
* Returns whether an unbounded range query should be treated the same as an existence query for the given field type.
*
* @param field the schema field
* @return whether unbounded range and existence are equivalent for the given field type.
*/
protected boolean treatUnboundedRangeAsExistence(SchemaField field) {
return true;
}
/**
* Returns a Query instance for doing range searches on this field type. {@link org.apache.solr.search.SolrQueryParser}
* currently passes <code>part1</code> and <code>part2</code> as null if they are '*' respectively. <code>minInclusive</code> and <code>maxInclusive</code> are both true
* currently by SolrQueryParser but that may change in the future. Also, other QueryParser implementations may have
* different semantics.
* <p>
* Sub-classes should override this method to provide their own range query implementation. They should strive to
* handle nulls in part1 and/or part2 as well as unequal minInclusive and maxInclusive parameters gracefully.
* handle nulls in <code>part1</code> and/or <code>part2</code> as well as unequal <code>minInclusive</code> and <code>maxInclusive</code> parameters gracefully.
* <p>
* This method does not, and should not, check for or handle existence queries, please look at {@link #getRangeQuery} for that logic.
*
* @param parser the {@link org.apache.solr.search.QParser} calling the method
* @param field the schema field
@ -867,31 +907,67 @@ public abstract class FieldType extends FieldProperties {
* @return a Query instance to perform range search according to given parameters
*
*/
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
// TODO: change these all to use readableToIndexed/bytes instead (e.g. for unicode collation)
final BytesRef miValue = part1 == null ? null : new BytesRef(toInternal(part1));
final BytesRef maxValue = part2 == null ? null : new BytesRef(toInternal(part2));
if (field.hasDocValues() && !field.indexed()) {
return SortedSetDocValuesField.newSlowRangeQuery(
field.getName(),
miValue, maxValue,
minInclusive, maxInclusive);
field.getName(),
miValue, maxValue,
minInclusive, maxInclusive);
} else {
SolrRangeQuery rangeQuery = new SolrRangeQuery(
field.getName(),
miValue, maxValue,
minInclusive, maxInclusive);
field.getName(),
miValue, maxValue,
minInclusive, maxInclusive);
return rangeQuery;
}
}
/**
* Returns a Query instance for doing existence searches for a field.
* If the field does not have docValues or norms, this method will call {@link #getSpecializedExistenceQuery}, which defaults to an unbounded rangeQuery.
* <p>
* This method should only be overriden whenever a fieldType does not support {@link org.apache.lucene.search.DocValuesFieldExistsQuery} or {@link org.apache.lucene.search.NormsFieldExistsQuery}.
* If a fieldType does not support an unbounded rangeQuery as an existenceQuery (such as <code>double</code> or <code>float</code> fields), {@link #getSpecializedExistenceQuery} should be overriden.
*
* @param parser The {@link org.apache.solr.search.QParser} calling the method
* @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
* @return The {@link org.apache.lucene.search.Query} instance.
*/
public Query getExistenceQuery(QParser parser, SchemaField field) {
if (field.hasDocValues()) {
return new DocValuesFieldExistsQuery(field.getName());
} else if (!field.omitNorms() && !isPointField()) { //TODO: Remove !isPointField() for SOLR-14199
return new NormsFieldExistsQuery(field.getName());
} else {
// Default to an unbounded range query
return getSpecializedExistenceQuery(parser, field);
}
}
/**
* Returns a Query instance for doing existence searches for a field without certain options, such as docValues or norms.
* <p>
* This method can be overriden to implement specialized existence logic for fieldTypes.
* The default query returned is an unbounded range query.
*
* @param parser The {@link org.apache.solr.search.QParser} calling the method
* @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
* @return The {@link org.apache.lucene.search.Query} instance.
*/
protected Query getSpecializedExistenceQuery(QParser parser, SchemaField field) {
return getSpecializedRangeQuery(parser, field, null, null, true, true);
}
/**
* Returns a Query instance for doing searches against a field.
* @param parser The {@link org.apache.solr.search.QParser} calling the method
* @param field The {@link org.apache.solr.schema.SchemaField} of the field to search
* @param externalVal The String representation of the value to search
* @return The {@link org.apache.lucene.search.Query} instance. This implementation returns a {@link org.apache.lucene.search.TermQuery} but overriding queries may not
*
*/
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
BytesRefBuilder br = new BytesRefBuilder();

View File

@ -103,7 +103,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
Point p1 = SpatialUtils.parsePointSolrException(part1, SpatialContext.GEO);
Point p2 = SpatialUtils.parsePointSolrException(part2, SpatialContext.GEO);

View File

@ -16,11 +16,16 @@
*/
package org.apache.solr.schema;
import java.util.EnumSet;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.NumericUtils;
@ -302,4 +307,38 @@ public abstract class NumericFieldType extends PrimitiveFieldType {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg);
}
}
public static EnumSet<NumberType> doubleOrFloat = EnumSet.of(NumberType.FLOAT, NumberType.DOUBLE);
/**
* For doubles and floats, unbounded range queries (which do not match NaN values) are not equivalent to existence queries (which do match NaN values).
*
* The two types of queries are equivalent for all other numeric types.
*
* @param field the schema field
* @return false for double and float fields, true for all others
*/
@Override
protected boolean treatUnboundedRangeAsExistence(SchemaField field) {
return !doubleOrFloat.contains(getNumberType());
}
/**
* Override the default existence behavior, so that the non-docValued/norms implementation matches NaN values for double and float fields.
* The [* TO *] query for those fields does not match 'NaN' values, so they must be matched separately.
* <p>
* For doubles and floats the query behavior is equivalent to (field:[* TO *] OR field:NaN).
* For all other numeric types, the default existence query behavior is used.
*/
@Override
public Query getSpecializedExistenceQuery(QParser parser, SchemaField field) {
if (doubleOrFloat.contains(getNumberType())) {
return new ConstantScoreQuery(new BooleanQuery.Builder()
.add(getSpecializedRangeQuery(parser, field, null, null, true, true), BooleanClause.Occur.SHOULD)
.add(getFieldQuery(parser, field, Float.toString(Float.NaN)), BooleanClause.Occur.SHOULD)
.setMinimumNumberShouldMatch(1).build());
} else {
return super.getSpecializedExistenceQuery(parser, field);
}
}
}

View File

@ -161,12 +161,9 @@ public abstract class PointField extends NumericFieldType {
protected abstract Query getExactQuery(SchemaField field, String externalVal);
public abstract Query getPointRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
boolean maxInclusive);
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
boolean maxInclusive) {
if (!field.indexed() && field.hasDocValues()) {
return getDocValuesRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
} else if (field.indexed() && field.hasDocValues()) {
@ -178,6 +175,9 @@ public abstract class PointField extends NumericFieldType {
}
}
public abstract Query getPointRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive,
boolean maxInclusive);
@Override
public String storedToReadable(IndexableField f) {
return toExternal(f);
@ -219,9 +219,12 @@ public abstract class PointField extends NumericFieldType {
}
protected abstract String indexedToReadable(BytesRef indexedForm);
@Override
public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
if ("".equals(termStr)) {
return getExistenceQuery(parser, sf);
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't run prefix queries on numeric fields");
}

View File

@ -128,7 +128,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
/**
* Care should be taken in calling this with higher order dimensions for performance reasons.
*/
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
//Query could look like: [x1,y1 TO x2,y2] for 2 dimension, but could look like: [x1,y1,z1 TO x2,y2,z2], and can be extrapolated to n-dimensions
//thus, this query essentially creates a box, cube, etc.
String[] p1 = parseCommaSeparatedList(part1, dimension);

View File

@ -158,7 +158,7 @@ public class TextField extends FieldType {
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
Analyzer multiAnalyzer = getMultiTermAnalyzer();
BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);

View File

@ -298,10 +298,10 @@ public class TrieField extends NumericFieldType {
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
protected Query getSpecializedRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
if (field.multiValued() && field.hasDocValues() && !field.indexed()) {
// for the multi-valued dv-case, the default rangeimpl over toInternal is correct
return super.getRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
return super.getSpecializedRangeQuery(parser, field, min, max, minInclusive, maxInclusive);
}
int ps = precisionStep;
Query query;

View File

@ -686,23 +686,33 @@
<dynamicField name="*_l_dv" type="long" indexed="true" stored="true" docValues="true" multiValued="false"/>
<dynamicField name="*_ls_dv" type="long" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_l_dvo" type="long" indexed="false" stored="true" docValues="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_d_dv" type="double" indexed="true" stored="true" docValues="true" multiValued="false"/>
<dynamicField name="*_ds_dv" type="double" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_d_dvo" type="double" indexed="false" stored="true" docValues="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_dt_dv" type="date" indexed="true" stored="true" docValues="true" multiValued="false"/>
<dynamicField name="*_dts_dv" type="date" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_dt_dvo" type="date" indexed="false" stored="true" docValues="true"/>
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
<!-- :TODO: why are these identical?!?!?! -->
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_sdv" type="string" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
<dynamicField name="*_bdv" type="boolean" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
<dynamicField name="*_ss_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_bdv" type="boolean" indexed="false" stored="false" docValues="true" useDocValuesAsStored="true"/>
<dynamicField name="*_b_dv" type="boolean" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_bs_dv" type="boolean" indexed="true" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_pi" type="pint" indexed="true" multiValued="false"/>
<dynamicField name="*_pl" type="plong" indexed="true" multiValued="false"/>
@ -721,6 +731,17 @@
<dynamicField name="*_iis" type="pint" indexed="false" stored="false" useDocValuesAsStored="true"/>
<dynamicField name="*_ff" type="pfloat" indexed="false" stored="false" useDocValuesAsStored="false"/>
<!-- testing fields with & without norms
TODO: Remove numeric norms for SOLR-14199 -->
<dynamicField name="*_t_on" type="text" indexed="true" stored="true" omitNorms="true" docValues="false"/>
<dynamicField name="*_b_norms" type="boolean" indexed="true" stored="true" omitNorms="false" docValues="false"/>
<dynamicField name="*_s_norms" type="string" indexed="true" stored="true" omitNorms="false" docValues="false"/>
<dynamicField name="*_dt_norms" type="date" indexed="true" stored="true" omitNorms="true" docValues="false"/>
<dynamicField name="*_i_norms" type="int" indexed="true" stored="true" omitNorms="false" docValues="false"/>
<dynamicField name="*_l_norms" type="long" indexed="true" stored="true" omitNorms="false" docValues="false"/>
<dynamicField name="*_f_norms" type="float" indexed="true" stored="true" omitNorms="false" docValues="false"/>
<dynamicField name="*_d_norms" type="double" indexed="true" stored="true" omitNorms="false" docValues="false"/>
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="text" indexed="true" stored="true" multiValued="true"/>

View File

@ -577,7 +577,11 @@
<!-- for in-place updates -->
<dynamicField name="*_i_dvo" multiValued="false" type="int" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_f_dvo" multiValued="false" type="float" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_d_dvo" multiValued="false" type="double" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_l_dvo" multiValued="false" type="long" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_dt_dvo" multiValued="false" type="date" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_s_dvo" multiValued="false" type="string" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_b_dvo" multiValued="false" type="boolean" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true"/>
@ -595,6 +599,7 @@
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
<dynamicField name="*_ws" type="nametext" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>

View File

@ -220,11 +220,16 @@ public class CurrencyFieldTypeTest extends SolrTestCaseJ4 {
assertQ(req("fl", "*,score", "q",
fieldName+":[24.99,EUR TO 25.01,EUR]"),
"//*[@numFound='1']");
// Open ended ranges without currency
assertQ(req("fl", "*,score", "q",
fieldName+":[* TO *]"),
"//*[@numFound='" + (2 + 10 + negDocs) + "']");
// Open ended ranges without currency
assertQ(req("fl", "*,score", "q",
fieldName+":*"),
"//*[@numFound='" + (2 + 10 + negDocs) + "']");
// Open ended ranges with currency
assertQ(req("fl", "*,score", "q",

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.search;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@ -94,6 +95,21 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
" +apache +solr");
}
public void testQueryLuceneAllDocsWithField() throws Exception {
// for all "primative" types except for doubles/floats, 'foo:*' should be functionally equivilent to "foo:[* TO *]"
// whatever implementation/optimizations exist for one syntax, should exist for the other syntax as well
// (regardless of docValues, multivalued, etc...)
for (String field : Arrays.asList("foo_sI", "foo_sS", "foo_s1", "foo_s",
"t_foo", "tv_foo", "tv_mv_foo",
"foo_b", "foo_b_dvo",
"foo_i", "foo_is", "foo_i_dvo",
"foo_l", "foo_l_dvo",
"foo_dt", "foo_dt_dvo")) {
assertQueryEquals("lucene", field + ":*", field + ":[* TO *]");
}
}
public void testQueryPrefix() throws Exception {
SolrQueryRequest req = req("myField","foo_s");
try {

View File

@ -17,6 +17,7 @@
package org.apache.solr.search;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -28,13 +29,16 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
@ -44,7 +48,10 @@ import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.query.FilterQuery;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@ -59,6 +66,15 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
createIndex();
}
private static final List<String> HAS_VAL_FIELDS = new ArrayList<String>(41);
private static final List<String> HAS_NAN_FIELDS = new ArrayList<String>(12);
@AfterClass
public static void afterClass() throws Exception {
HAS_VAL_FIELDS.clear();
HAS_NAN_FIELDS.clear();
}
public static void createIndex() {
String v;
v = "how now brown cow";
@ -73,12 +89,98 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash"));
assertU(adoc("id", "20", "syn", "wifi ATM"));
{ // make a doc that has a value in *lots* of fields that no other doc has
SolrInputDocument doc = sdoc("id", "999");
// numbers...
for (String t : Arrays.asList("i", "l", "f", "d")) {
for (String s : Arrays.asList("", "s", "_dv", "s_dv", "_dvo", "_norms")) {
final String f = "has_val_" + t + s;
HAS_VAL_FIELDS.add(f);
doc.addField(f, "42");
if (t.equals("f") || t.equals("d")) {
String nanField = "nan_val_" + t + s;
doc.addField(nanField, "NaN");
HAS_NAN_FIELDS.add(nanField);
// Add a NaN & non-NaN value for multivalue fields, these should match :* and :[* TO *] equivalently
if (s.startsWith("s")) {
String bothField = "both_val_" + t + s;
doc.addField(bothField, "42");
doc.addField(bothField, "NaN");
HAS_VAL_FIELDS.add(bothField);
}
}
}
}
// boolean...booleans
for (String s : Arrays.asList("", "s", "_dv", "_norms")) {
final String f = "has_val_b" + s;
HAS_VAL_FIELDS.add(f);
doc.addField(f, "false");
}
// dates (and strings/text -- they don't care about the format)...
for (String s : Arrays.asList("dt", "s", "s1", "t", "t_on", "dt_norms", "s_norms", "dt_dv", "s_dv")) {
final String f = "has_val_" + s;
HAS_VAL_FIELDS.add(f);
doc.addField(f, "2019-01-12T00:00:00Z");
}
assertU(adoc(doc));
}
assertU(adoc("id", "30", "shingle23", "A B X D E"));
assertU(commit());
}
public void testDocsWithValuesInField() throws Exception {
assertEquals("someone changed the test setup of HAS_VAL_FIELDS, w/o updating the sanity check",
41, HAS_VAL_FIELDS.size());
for (String f : HAS_VAL_FIELDS) {
// for all of these fields, these 2 syntaxes should be functionally equivilent
// in matching the one doc that contains these fields
for (String q : Arrays.asList( f + ":*", f + ":[* TO *]" )) {
assertJQ(req("q", q)
, "/response/numFound==1"
, "/response/docs/[0]/id=='999'"
);
// the same syntaxes should be valid even if no doc has the field...
assertJQ(req("q", "bogus___" + q)
, "/response/numFound==0"
);
}
}
}
public void testDocsWithNaNInField() throws Exception {
assertEquals("someone changed the test setup of HAS_NAN_FIELDS, w/o updating the sanity check",
12, HAS_NAN_FIELDS.size());
for (String f : HAS_NAN_FIELDS) {
// for all of these fields, field:* should NOT be equivalent to field:[* TO *]
assertJQ(req("q", f + ":*")
, "/response/numFound==1"
, "/response/docs/[0]/id=='999'"
);
assertJQ(req("q", f + ":[* TO *]")
, "/response/numFound==0"
);
assertJQ(req("q", f + ":[-Infinity TO Infinity]")
, "/response/numFound==0"
);
for (String q : Arrays.asList( f + ":*", f + ":[* TO *]", f + ":[-Infinity TO Infinity]" )) {
// the same syntaxes should be valid even if no doc has the field...
assertJQ(req("q", "bogus___" + q)
, "/response/numFound==0"
);
}
}
}
@Test
public void testPhrase() {
// "text" field's type has WordDelimiterGraphFilter (WDGFF) and autoGeneratePhraseQueries=true
@ -1135,14 +1237,14 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
"is_dv", "fs_dv", "ds_dv", "ls_dv",
"i_dvo", "f_dvo", "d_dvo", "l_dvo",
};
for (String suffix:fieldSuffix) {
//Good queries
qParser = QParser.getParser("foo_" + suffix + ":(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 25)", req);
qParser.setIsFilter(true);
qParser.getQuery();
}
for (String suffix:fieldSuffix) {
qParser = QParser.getParser("foo_" + suffix + ":(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 NOT_A_NUMBER)", req);
qParser.setIsFilter(true); // this may change in the future
@ -1150,7 +1252,59 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
assertTrue("Unexpected exception: " + e.getMessage(), e.getMessage().contains("Invalid Number: NOT_A_NUMBER"));
}
}
@Test
public void testFieldExistsQueries() throws SyntaxError {
SolrQueryRequest req = req();
String[] fieldSuffix = new String[] {
"ti", "tf", "td", "tl", "tdt",
"pi", "pf", "pd", "pl", "pdt",
"i", "f", "d", "l", "dt", "s", "b",
"is", "fs", "ds", "ls", "dts", "ss", "bs",
"i_dv", "f_dv", "d_dv", "l_dv", "dt_dv", "s_dv", "b_dv",
"is_dv", "fs_dv", "ds_dv", "ls_dv", "dts_dv", "ss_dv", "bs_dv",
"i_dvo", "f_dvo", "d_dvo", "l_dvo", "dt_dvo",
"t",
"t_on", "b_norms", "s_norms", "dt_norms", "i_norms", "l_norms", "f_norms", "d_norms"
};
String[] existenceQueries = new String[] {
"*", "[* TO *]"
};
for (String existenceQuery : existenceQueries) {
for (String suffix : fieldSuffix) {
IndexSchema indexSchema = h.getCore().getLatestSchema();
String field = "foo_" + suffix;
String query = field + ":" + existenceQuery;
QParser qParser = QParser.getParser(query, req);
Query createdQuery = qParser.getQuery();
SchemaField schemaField = indexSchema.getField(field);
// Test float & double realNumber queries differently
if ("[* TO *]".equals(existenceQuery) && (schemaField.getType().getNumberType() == NumberType.DOUBLE || schemaField.getType().getNumberType() == NumberType.FLOAT)) {
assertFalse("For float and double fields \"" + query + "\" is not an existence query, so the query returned should not be a DocValuesFieldExistsQuery.", createdQuery instanceof DocValuesFieldExistsQuery);
assertFalse("For float and double fields \"" + query + "\" is not an existence query, so the query returned should not be a NormsFieldExistsQuery.", createdQuery instanceof NormsFieldExistsQuery);
assertFalse("For float and double fields \"" + query + "\" is not an existence query, so NaN should not be matched via a ConstantScoreQuery.", createdQuery instanceof ConstantScoreQuery);
assertFalse("For float and double fields\"" + query + "\" is not an existence query, so NaN should not be matched via a BooleanQuery (NaN and [* TO *]).", createdQuery instanceof BooleanQuery);
} else {
if (schemaField.hasDocValues()) {
assertTrue("Field has docValues, so existence query \"" + query + "\" should return DocValuesFieldExistsQuery", createdQuery instanceof DocValuesFieldExistsQuery);
} else if (!schemaField.omitNorms() && !schemaField.getType().isPointField()) { //TODO: Remove !isPointField() for SOLR-14199
assertTrue("Field has norms and no docValues, so existence query \"" + query + "\" should return NormsFieldExistsQuery", createdQuery instanceof NormsFieldExistsQuery);
} else if (schemaField.getType().getNumberType() == NumberType.DOUBLE || schemaField.getType().getNumberType() == NumberType.FLOAT) {
assertTrue("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\".", createdQuery instanceof ConstantScoreQuery);
assertTrue("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\".", ((ConstantScoreQuery)createdQuery).getQuery() instanceof BooleanQuery);
assertEquals("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\". This boolean query must be an OR.", 1, ((BooleanQuery)((ConstantScoreQuery)createdQuery).getQuery()).getMinimumNumberShouldMatch());
assertEquals("PointField with NaN values must include \"exists or NaN\" if the field doesn't have norms or docValues: \"" + query + "\". This boolean query must have 2 clauses.", 2, ((BooleanQuery)((ConstantScoreQuery)createdQuery).getQuery()).clauses().size());
} else {
assertFalse("Field doesn't have docValues, so existence query \"" + query + "\" should not return DocValuesFieldExistsQuery", createdQuery instanceof DocValuesFieldExistsQuery);
assertFalse("Field doesn't have norms, so existence query \"" + query + "\" should not return NormsFieldExistsQuery", createdQuery instanceof NormsFieldExistsQuery);
}
}
}
}
}
}

View File

@ -146,6 +146,15 @@ To perform a proximity search, add the tilde character ~ and a numeric value to
The distance referred to here is the number of term movements needed to match the specified phrase. In the example above, if "apache" and "jakarta" were 10 spaces apart in a field, but "apache" appeared before "jakarta", more than 10 term movements would be required to move the terms together and position "apache" to the right of "jakarta" with a space in between.
=== Existence Searches
An existence search for a field matches all documents where a value exists for that field.
To query for a field existing, simply use a wildcard instead of a term in the search.
`field:*`
A field will be considered to "exist" if it has any value, even values which are often considered "not existent". (e.g. `NaN`, `""`, etc.)
=== Range Searches
A range search specifies a range of values for a field (a range with an upper bound and a lower bound). The query matches documents whose values for the specified field or fields fall within the range. Range queries can be inclusive or exclusive of the upper and lower bounds. Sorting is done lexicographically, except on numeric fields. For example, the range query below matches all documents whose `popularity` field has a value between 52 and 10,000, inclusive.
@ -164,6 +173,25 @@ The brackets around a query determine its inclusiveness.
* Curly brackets `{` & `}` denote an exclusive range query that matches values between the upper and lower bounds, but excluding the upper and lower bounds themselves.
* You can mix these types so one end of the range is inclusive and the other is exclusive. Here's an example: `count:{1 TO 10]`
Wildcards, `*`, can also be used for either or both endpoints to specify an open-ended range query.
This is a <<#differences-between-lucenes-classic-query-parser-and-solrs-standard-query-parser,divergence from Lucene's Classic Query Parser>>.
* `field:[* TO 100]` finds all field values less than or equal to 100.
* `field:[100 TO *]` finds all field values greater than or equal to 100.
* `field:[* TO *]` finds any document with a value between the effective values of -Infinity and +Infinity for that field type.
[NOTE]
.Matching `NaN` values with wildcards
====
For most fields, unbounded range queries, `field:[* TO *]`, are equivalent to existence queries, `field: *` .
However for float/double types that support `NaN` values, these two queries perform differently.
* `field:*` matches all existing values, including `NaN`
* `field:[* TO *]` matches all real values, excluding `NaN`
====
=== Boosting a Term with "^"
Lucene/Solr provides the relevance level of matching documents based on the terms found. To boost a term use the caret symbol `^` with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be.
@ -322,13 +350,14 @@ Comments may be nested.
Solr's standard query parser originated as a variation of Lucene's "classic" QueryParser. It diverges in the following ways:
* A `*` may be used for either or both endpoints to specify an open-ended range query
* A `*` may be used for either or both endpoints to specify an open-ended range query, or by itself as an existence query.
** `field:[* TO 100]` finds all field values less than or equal to 100
** `field:[100 TO *]` finds all field values greater than or equal to 100
** `field:[* TO *]` matches all documents with the field
** `field:[* TO *]` finds all documents where the field has a value between `-Infinity` and `Infinity`, excluding `NaN`.
** `field:*` finds all documents where the field exists (i.e. has any value).
* Pure negative queries (all clauses prohibited) are allowed (only as a top-level clause)
** `-inStock:false` finds all field values where inStock is not false
** `-field:[* TO *]` finds all documents without a value for field
** `-field:*` finds all documents without a value for the field.
* Support for embedded Solr queries (sub-queries) using any type of query parser as a nested clause using the local-params syntax.
** `inStock:true OR {!dismax qf='name manu' v='ipod'}`
+