SOLR-12962: Added a new 'uninvertible' option for fields and fieldtypes. This defaults to 'true' for backcompat allowing a FieldCache to be built for indexed fields as needed, but users are encouraged to set this to false (using docValues as needed) to reduce the risk of large fluxuations in heap size due to unexpected attempts to sort/facet/function on non-docValue fields.

This commit is contained in:
Chris Hostetter 2018-11-09 08:30:04 -07:00
parent 74e3ff509e
commit 77a4bfaa90
32 changed files with 558 additions and 191 deletions

View File

@ -191,6 +191,11 @@ New Features
* SOLR-12975: Add ltrim and rtrim Stream Evaluators (Joel Bernstein)
* SOLR-12962: Added a new 'uninvertible' option for fields and fieldtypes. This defaults to 'true' for
backcompat allowing a FieldCache to be built for indexed fields as needed, but users are encouraged
to set this to false (using docValues as needed) to reduce the risk of large fluxuations in heap
size due to unexpected attempts to sort/facet/function on non-docValue fields. (hossman)
Other Changes
----------------------

View File

@ -205,6 +205,7 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (f != null && f.fieldType().tokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().stored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().docValuesType() != DocValuesType.NONE) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-" );
flags.append( (false) ? FieldFlag.UNINVERTIBLE.getAbbreviation() : '-' ); // SchemaField Specific
flags.append( (false) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' ); // SchemaField Specific
flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
@ -244,6 +245,7 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (t != null && t.isTokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
flags.append( (f != null && f.stored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.hasDocValues()) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-" );
flags.append( (f != null && f.isUninvertible()) ? FieldFlag.UNINVERTIBLE.getAbbreviation() : "-" );
flags.append( (f != null && f.multiValued()) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermVector() ) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );

View File

@ -670,6 +670,13 @@ public class SimpleFacets {
method = field.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
}
/* Unless isUninvertible() is true, we prohibit any use of UIF...
Here we just force FC(S) instead, and trust that the DocValues faceting logic will
do the right thing either way (with or w/o docvalues) */
if (FacetMethod.UIF == method && ! field.isUninvertible()) {
method = field.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
}
/* ENUM can't deal with trie fields that index several terms per value */
if (method == FacetMethod.ENUM
&& TrieField.getMainValuePrefix(type) != null) {

View File

@ -67,6 +67,13 @@ public class CurrencyField extends CurrencyFieldType implements SchemaAware, Res
args.remove(PARAM_PRECISION_STEP);
}
// NOTE: because we're not using the PluginLoader to register these field types, they aren't "real"
// field types and never get Schema default properties (based on schema.xml's version attribute)
// so only the properties explicitly set here (or on the SchemaField's we create from them) are used.
//
// In theory we should fix this, but since this class is already deprecated, we'll leave it alone
// to simplify the risk of back-compat break for existing users.
// Initialize field type for amount
fieldTypeAmountRaw = new TrieLongField();
fieldTypeAmountRaw.setTypeName(FIELD_TYPE_AMOUNT_RAW);
@ -91,6 +98,7 @@ public class CurrencyField extends CurrencyFieldType implements SchemaAware, Res
props.put("stored", "false");
props.put("multiValued", "false");
props.put("omitNorms", "true");
props.put("uninvertible", "true");
int p = SchemaField.calcProps(name, type, props);
schema.registerDynamicFields(SchemaField.create(name, type, p, null));
}

View File

@ -54,6 +54,7 @@ public abstract class FieldProperties {
protected final static int STORE_TERMPAYLOADS = 0b10000000000000000;
protected final static int USE_DOCVALUES_AS_STORED = 0b100000000000000000;
protected final static int LARGE_FIELD = 0b1000000000000000000;
protected final static int UNINVERTIBLE = 0b10000000000000000000;
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
@ -61,7 +62,8 @@ public abstract class FieldProperties {
"termVectors", "termPositions", "termOffsets",
"multiValued",
"sortMissingFirst","sortMissingLast","required", "omitPositions",
"storeOffsetsWithPositions", "docValues", "termPayloads", "useDocValuesAsStored", "large"
"storeOffsetsWithPositions", "docValues", "termPayloads", "useDocValuesAsStored", "large",
"uninvertible"
};
static final Map<String,Integer> propertyMap = new HashMap<>();

View File

@ -168,7 +168,9 @@ public abstract class FieldType extends FieldProperties {
args.remove("compressThreshold");
}
if (schemaVersion >= 1.6f) properties |= USE_DOCVALUES_AS_STORED;
properties |= UNINVERTIBLE;
this.args = Collections.unmodifiableMap(args);
Map<String,String> initArgs = new HashMap<>(args);
initArgs.remove(CLASS_NAME); // consume the class arg
@ -456,12 +458,18 @@ public abstract class FieldType extends FieldProperties {
}
/**
* <p>
* If DocValues is not enabled for a field, but it's indexed, docvalues can be constructed
* on the fly (uninverted, aka fieldcache) on the first request to sort, facet, etc.
* This specifies the structure to use.
* </p>
* <p>
* This method will not be used if the field is (effectively) <code>uninvertible="false"</code>
* </p>
*
* @param sf field instance
* @return type to uninvert, or {@code null} (to disallow uninversion for the field)
* @see SchemaField#isUninvertible()
*/
public abstract UninvertingReader.Type getUninversionType(SchemaField sf);
@ -1009,6 +1017,7 @@ public abstract class FieldType extends FieldProperties {
namedPropertyValues.add(getPropertyName(STORE_OFFSETS), hasProperty(STORE_OFFSETS));
namedPropertyValues.add(getPropertyName(MULTIVALUED), hasProperty(MULTIVALUED));
namedPropertyValues.add(getPropertyName(LARGE_FIELD), hasProperty(LARGE_FIELD));
namedPropertyValues.add(getPropertyName(UNINVERTIBLE), hasProperty(UNINVERTIBLE));
if (hasProperty(SORT_MISSING_FIRST)) {
namedPropertyValues.add(getPropertyName(SORT_MISSING_FIRST), true);
} else if (hasProperty(SORT_MISSING_LAST)) {

View File

@ -358,7 +358,21 @@ public class IndexSchema {
if (sf == null) {
return null;
}
return sf.getType().getUninversionType(sf);
if (sf.isUninvertible()) {
return sf.getType().getUninversionType(sf);
}
// else...
// It would be nice to throw a helpful error here, with a good useful message for the user,
// but unfortunately, inspite of the UninvertingReader class jdoc claims that the uninversion
// process is lazy, that doesn't mean it's lazy as of "When a caller attempts ot use doc values"
//
// The *mapping* function is consulted on LeafReader init/wrap for every FieldInfos found w/o docValues.
//
// So if we throw an error here instead of returning null, the act of just opening a
// newSearcher will trigger that error for any field, even if no one ever attempts to uninvert it
return null;
};
}

View File

@ -88,6 +88,7 @@ public final class SchemaField extends FieldProperties implements IndexableField
public FieldType getType() { return type; }
public int getProperties() { return properties; }
public boolean isUninvertible() { return (properties & UNINVERTIBLE)!=0; }
public boolean indexed() { return (properties & INDEXED)!=0; }
public boolean stored() { return (properties & STORED)!=0; }
public boolean hasDocValues() { return (properties & DOC_VALUES) != 0; }
@ -171,18 +172,18 @@ public final class SchemaField extends FieldProperties implements IndexableField
+ getName() + " of type: " + this.type.getTypeName());
}
if (! hasDocValues() ) {
if ( ! ( indexed() && null != this.type.getUninversionType(this) ) ) {
if ( ! ( indexed() && isUninvertible() && null != this.type.getUninversionType(this) ) ) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"can not sort on a field w/o docValues unless it is indexed and supports Uninversion: "
"can not sort on a field w/o docValues unless it is indexed=true uninvertible=true and the type supports Uninversion: "
+ getName());
}
}
}
/**
* Sanity checks that the properties of this field type are plausible
* for a field that may be used to get a FieldCacheSource, throwing
* an appropriate exception (including the field name) if it is not.
* Sanity checks that the properties of this field type are plausible for a field
* that may be used to get a {@link org.apache.lucene.queries.function.valuesource.FieldCacheSource},
* throwing an appropriate exception (including the field name) if it is not.
* FieldType subclasses can choose to call this method in their
* getValueSource implementation
* @see FieldType#getValueSource
@ -194,9 +195,9 @@ public final class SchemaField extends FieldProperties implements IndexableField
+ getName());
}
if (! hasDocValues() ) {
if ( ! ( indexed() && null != this.type.getUninversionType(this) ) ) {
if ( ! ( indexed() && isUninvertible() && null != this.type.getUninversionType(this) ) ) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"can not use FieldCache on a field w/o docValues unless it is indexed and supports Uninversion: "
"can not use FieldCache on a field w/o docValues unless it is indexed uninvertible=true and the type supports Uninversion: "
+ getName());
}
}
@ -247,17 +248,17 @@ public final class SchemaField extends FieldProperties implements IndexableField
if (on(falseProps,INDEXED)) {
int pp = (INDEXED
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS | UNINVERTIBLE);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed field:" + props);
}
p &= ~pp;
}
if (on(falseProps,INDEXED) && on(falseProps,DOC_VALUES)) {
if (on(falseProps,UNINVERTIBLE) && on(falseProps,DOC_VALUES)) {
int pp = (SORT_MISSING_FIRST | SORT_MISSING_LAST);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed/non-docValues field:" + props);
throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-docValues/non-uninvertible field:" + props);
}
p &= ~pp;
}
@ -341,6 +342,7 @@ public final class SchemaField extends FieldProperties implements IndexableField
properties.add(getPropertyName(STORE_OFFSETS), storeOffsetsWithPositions());
properties.add(getPropertyName(MULTIVALUED), multiValued());
properties.add(getPropertyName(LARGE_FIELD), isLarge());
properties.add(getPropertyName(UNINVERTIBLE), isUninvertible());
if (sortMissingFirst()) {
properties.add(getPropertyName(SORT_MISSING_FIRST), sortMissingFirst());
} else if (sortMissingLast()) {

View File

@ -64,8 +64,12 @@ public class SpatialPointVectorFieldType extends AbstractSpatialFieldType<PointV
}
precisionStep = ((TrieField)fieldType).getPrecisionStep();
//Just set these, delegate everything else to the field type
final int p = (INDEXED | TOKENIZED | OMIT_NORMS | OMIT_TF_POSITIONS);
// NOTE: the SchemaField constructor we're using ignores any properties of the fieldType
// so only the ones we're explicitly setting get used.
//
// In theory we should fix this, but since this class is already deprecated, we'll leave it alone
// to simplify the risk of back-compat break for existing users.
final int p = (INDEXED | TOKENIZED | OMIT_NORMS | OMIT_TF_POSITIONS | UNINVERTIBLE);
List<SchemaField> newFields = new ArrayList<>();
for( SchemaField sf : schema.getFields().values() ) {
if( sf.getType() == this ) {

View File

@ -392,9 +392,19 @@ public class CollapsingQParserPlugin extends QParserPlugin {
* This is VERY fast at query time but slower to warm and causes insanity.
*/
public static LeafReader getTopFieldCacheReader(SolrIndexSearcher searcher, String collapseField) {
UninvertingReader.Type type = null;
final SchemaField f = searcher.getSchema().getFieldOrNull(collapseField);
assert null != f; // should already be enforced higher up
assert !f.multiValued(); // should already be enforced higher up
assert f.getType() instanceof StrField; // this method shouldn't be called otherwise
if (f.indexed() && f.isUninvertible()) {
type = UninvertingReader.Type.SORTED;
}
return UninvertingReader.wrap(
new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
Collections.singletonMap(collapseField, type)::get);
}
private static class ReaderWrapper extends FilterLeafReader {

View File

@ -24,6 +24,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
// Any type of facet request that generates a variable number of buckets
// and the ability to sort by those generated buckets.
abstract class FacetRequestSorted extends FacetRequest {
@ -110,7 +111,7 @@ public class FacetField extends FacetRequestSorted {
if (fcontext.facetInfo != null) {
// refinement... we will end up either skipping the entire facet, or doing calculating only specific facet buckets
if (multiToken && !sf.hasDocValues() && method!=FacetMethod.DV) {
if (multiToken && !sf.hasDocValues() && method!=FacetMethod.DV && sf.isUninvertible()) {
// Match the access method from the first phase.
// It won't always matter, but does currently for an all-values bucket
return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
@ -118,7 +119,7 @@ public class FacetField extends FacetRequestSorted {
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}
NumberType ntype = ft.getNumberType();
NumberType ntype = ft.getNumberType();
// ensure we can support the requested options for numeric faceting:
if (ntype != null) {
if (prefix != null) {
@ -163,7 +164,7 @@ public class FacetField extends FacetRequestSorted {
// multi-valued after this point
if (sf.hasDocValues() || method == FacetMethod.DV) {
if (sf.hasDocValues() || method == FacetMethod.DV || !sf.isUninvertible()) {
// single and multi-valued string docValues
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}

View File

@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
@ -22,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
/** {@link UnInvertedField} implementation of field faceting.
@ -32,6 +32,10 @@ class FacetFieldProcessorByArrayUIF extends FacetFieldProcessorByArray {
FacetFieldProcessorByArrayUIF(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);
if (! sf.isUninvertible()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
getClass()+" can not be used on fields where uninvertible='false'");
}
}
@Override

View File

@ -0,0 +1,35 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="bad-schema-not-indexed-but-uninvertible" version="1.6">
<fieldType name="string" class="solr.StrField"/>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<!-- BEGIN BAD STUFF -->
<field name="bad_field" type="string" indexed="false" uninvertible="true" />
<!-- END BAD STUFF -->
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -50,6 +50,9 @@
<fieldType name="int_dvas_t" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" useDocValuesAsStored="true"/>
<fieldType name="int_dvas_f" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" useDocValuesAsStored="false"/>
<fieldType name="str_uninvert_f" class="solr.StrField" uninvertible="false"/>
<fieldType name="str_uninvert_t" class="solr.StrField" uninvertible="true"/>
<!-- all behavior is default -->
<field name="text" type="text"/>
@ -98,6 +101,11 @@
<dynamicField name="*_dyn_ft_intdvas_t" type="int_dvas_t"/>
<dynamicField name="*_dyn_ft_intdvas_f" type="int_dvas_f"/>
<field name="ft_uninvert_t" type="str_uninvert_t"/>
<field name="ft_uninvert_f" type="str_uninvert_f"/>
<dynamicField name="*_dyn_ft_uninvert_t" type="str_uninvert_t"/>
<dynamicField name="*_dyn_ft_uninvert_f" type="str_uninvert_f"/>
<!-- explicit props on field -->
<field name="multi_f" type="str" multiValued="false"/>
<field name="multi_t" type="str" multiValued="true"/>
@ -128,5 +136,10 @@
<field name="intdvas_f" type="int" useDocValuesAsStored="false"/>
<dynamicField name="*_dyn_intdvas_t" type="int" useDocValuesAsStored="true"/>
<dynamicField name="*_dyn_intdvas_f" type="int" useDocValuesAsStored="false"/>
<field name="uninvert_t" type="str" uninvertible="true"/>
<field name="uninvert_f" type="str" uninvertible="false"/>
<dynamicField name="*_dyn_uninvert_t" type="str" uninvertible="true"/>
<dynamicField name="*_dyn_uninvert_f" type="str" uninvertible="false"/>
</schema>

View File

@ -638,6 +638,8 @@
<field name="payloadDelimited" type="payloadDelimited"/>
<field name="sortabuse_not_uninvertible" type="string" indexed="true" multiValued="false" uninvertible="false" />
<!-- EnumType -->
<field name="severity" type="severityType" docValues="true" indexed="true" stored="true" multiValued="false"/>
@ -753,7 +755,13 @@
<dynamicField name="*_dd_dvo" multiValued="true" type="double" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<!-- Indexed, but NOT uninvertible -->
<dynamicField name="*_s_not_uninvert" type="string" indexed="true" stored="false" docValues="false" uninvertible="false" />
<!-- docValues, but NOT uninvertible -->
<dynamicField name="*_s_not_uninvert_dv" type="string" indexed="true" stored="false" docValues="true" uninvertible="false" />
<!-- Only Stored numerics -->
<dynamicField name="*_i_os" type="int" indexed="false" stored="true" docValues="false"/>
<dynamicField name="*_l_os" type="long" indexed="false" stored="true" docValues="false"/>
@ -823,6 +831,10 @@
<copyField source="range_facet_f" dest="range_facet_d_dv"/>
<copyField source="bday" dest="range_facet_dt_dv"/>
<copyField source="trait_s" dest="trait_s_not_uninvert"/>
<copyField source="trait_s" dest="trait_s_not_uninvert_dv"/>
<copyField source="trait_s" dest="trait_s_not_indexed_sS"/>
<!-- dynamic destination -->
<copyField source="*_dynamic" dest="dynamic_*"/>
<copyField source="*_path" dest="*_ancestor"/>

View File

@ -455,6 +455,9 @@ valued. -->
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
<!-- Indexed, but NOT uninvertible -->
<dynamicField name="*_s_not_uninvert" type="string" indexed="true" stored="false" docValues="false" uninvertible="false" />
<!-- for testing tfidf functions, see TestFunctionQuery.testTFIDFFunctions -->
<dynamicField name="*_tfidf" type="tfidf_text" indexed="true" stored="true" />
<fieldType name="tfidf_text" class="solr.TextField" positionIncrementGap="100">

View File

@ -292,6 +292,12 @@
<field name="signatureField" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<!-- Indexed, but NOT uninvertible -->
<field name="where_s_multi_not_uninvert" type="string" indexed="true" stored="false" docValues="false" uninvertible="false" multiValued="true" />
<field name="where_s_single_not_uninvert" type="string" indexed="true" stored="false" docValues="false" uninvertible="false" multiValued="false" />
<!-- docValues, but NOT uninvertible -->
<field name="where_s_multi_not_uninvert_dv" type="string" indexed="true" stored="false" docValues="true" uninvertible="false" multiValued="true" />
<field name="where_s_single_not_uninvert_dv" type="string" indexed="true" stored="false" docValues="true" uninvertible="false" multiValued="false" />
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
@ -324,6 +330,12 @@
<!-- Create a string version of author for faceting -->
<copyField source="author" dest="author_s"/>
<copyField source="where_s" dest="where_s_multi_not_uninvert"/>
<copyField source="where_s" dest="where_s_multi_not_uninvert_dv"/>
<copyField source="where_s" dest="where_s_single_not_uninvert"/>
<copyField source="where_s" dest="where_s_single_not_uninvert_dv"/>
<copyField source="where_s" dest="where_s_not_indexed_sS"/>
<!-- Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same

View File

@ -22,6 +22,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@ -989,30 +990,32 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
public void testAbuseOfSort() {
assertU(adoc("id", "9999991",
"sortabuse_b", "true",
"sortabuse_not_uninvertible", "xxx",
"sortabuse_t", "zzz xxx ccc vvv bbb nnn aaa sss ddd fff ggg"));
assertU(adoc("id", "9999992",
"sortabuse_b", "true",
"sortabuse_not_uninvertible", "yyy",
"sortabuse_t", "zzz xxx ccc vvv bbb nnn qqq www eee rrr ttt"));
assertU(commit());
RuntimeException outerEx = expectThrows(RuntimeException.class, () -> {
ignoreException("can not sort on multivalued field: sortabuse_t");
assertQ("sort on something that shouldn't work",
req("q", "sortabuse_b:true",
"sort", "sortabuse_t asc"),
"*[count(//doc)=2]");
});
Throwable root = getRootCause(outerEx);
assertEquals("sort exception root cause",
SolrException.class, root.getClass());
SolrException e = (SolrException) root;
assertEquals("incorrect error type",
SolrException.ErrorCode.BAD_REQUEST,
SolrException.ErrorCode.getErrorCode(e.code()));
assertTrue("exception doesn't contain field name",
e.getMessage().contains("sortabuse_t"));
for (String f : Arrays.asList("sortabuse_not_uninvertible", "sortabuse_t")) {
RuntimeException outerEx = expectThrows(RuntimeException.class, () -> {
ignoreException("sortabuse");
assertQ("sort on something that shouldn't work",
req("q", "*:*",
"sort", f+ " asc"),
"*[count(//doc)=2]");
});
Throwable root = getRootCause(outerEx);
assertEquals("sort exception root cause",
SolrException.class, root.getClass());
SolrException e = (SolrException) root;
assertEquals("incorrect error type",
SolrException.ErrorCode.BAD_REQUEST,
SolrException.ErrorCode.getErrorCode(e.code()));
assertTrue("exception doesn't contain field name",
e.getMessage().contains(f));
}
}
// /** this doesn't work, but if it did, this is how we'd test it. */

View File

@ -33,6 +33,7 @@ import org.apache.solr.common.params.FacetParams.FacetRangeInclude;
import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
import org.apache.solr.common.params.FacetParams.FacetRangeOther;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.NumberType;
@ -842,6 +843,89 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
);
}
public void testBehaviorEquivilenceOfUninvertibleFalse() throws Exception {
// NOTE: mincount=0 affects method detection/coercion, so we include permutations of it
{
// an "uninvertible=false" field is not be facetable using the "default" method,
// or any explicit method other then "enum".
//
// it should behave the same as any attempt (using any method) at faceting on
// and "indexed=false docValues=false" field -- returning no buckets.
final List<SolrParams> paramSets = new ArrayList<>();
for (String min : Arrays.asList("0", "1")) {
for (String f : Arrays.asList("trait_s_not_uninvert", "trait_s_not_indexed_sS")) {
paramSets.add(params("facet.field", "{!key=x}" + f));
for (String method : Arrays.asList("fc", "fcs", "uif")) {
paramSets.add(params("facet.field", "{!key=x}" + f,
"facet.mincount", min,
"facet.method", method));
paramSets.add(params("facet.field", "{!key=x}" + f,
"facet.mincount", min,
"facet.method", method));
}
}
paramSets.add(params("facet.field", "{!key=x}trait_s_not_indexed_sS",
"facet.mincount", min,
"facet.method", "enum"));
}
for (SolrParams p : paramSets) {
// "empty" results should be the same regardless of mincount
assertQ("expect no buckets when field is not-indexed or not-uninvertible",
req(p
,"rows","0"
,"q", "id_i1:[42 TO 47]"
,"fq", "id_i1:[42 TO 45]"
,"facet", "true"
)
,"//*[@numFound='4']"
,"*[count(//lst[@name='x'])=1]"
,"*[count(//lst[@name='x']/int)=0]"
);
}
}
{
// the only way to facet on an "uninvertible=false" field is to explicitly request facet.method=enum
// in which case it should behave consistently with it's copyField source & equivilent docValues field
// (using any method for either of them)
final List<SolrParams> paramSets = new ArrayList<>();
for (String min : Arrays.asList("0", "1")) {
paramSets.add(params("facet.field", "{!key=x}trait_s_not_uninvert",
"facet.method", "enum"));
for (String okField : Arrays.asList("trait_s", "trait_s_not_uninvert_dv")) {
paramSets.add(params("facet.field", "{!key=x}" + okField));
for (String method : Arrays.asList("enum","fc", "fcs", "uif")) {
paramSets.add(params("facet.field", "{!key=x}" + okField,
"facet.method", method));
}
}
for (SolrParams p : paramSets) {
assertQ("check counts for applied facet queries using filtering (fq)",
req(p
,"rows","0"
,"q", "id_i1:[42 TO 47]"
,"fq", "id_i1:[42 TO 45]"
,"facet", "true"
,"facet.mincount", min
)
,"//*[@numFound='4']"
,"*[count(//lst[@name='x'])=1]"
,"*[count(//lst[@name='x']/int)="+("0".equals(min) ? "4]" : "3]")
,"//lst[@name='x']/int[@name='Tool'][.='2']"
,"//lst[@name='x']/int[@name='Obnoxious'][.='1']"
,"//lst[@name='x']/int[@name='Chauvinist'][.='1']"
,"count(//lst[@name='x']/int[@name='Pig'][.='0'])=" + ("0".equals(min) ? "1" : "0")
);
}
}
}
}
public static void indexDateFacets() {
final String i = "id";
final String f = "bday";

View File

@ -17,8 +17,7 @@
package org.apache.solr.request;
import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import org.apache.solr.request.SimpleFacets.FacetMethod;
import org.apache.solr.schema.BoolField;
@ -26,201 +25,214 @@ import org.apache.solr.schema.IntPointField;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TrieIntField;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
public class TestFacetMethods {
public class TestFacetMethods extends LuceneTestCase {
// TODO - make these public in FieldProperties?
protected final static int MULTIVALUED = 0x00000200;
protected final static int DOC_VALUES = 0x00008000;
protected final static int UNINVERTIBLE = 0b10000000000000000000;
protected static boolean propsMatch( int x, int y ) {
return (x & y) != 0;
}
@Test
public void testNumericSingleValuedDV() {
SchemaField field = new SchemaField("field", new TrieIntField(), DOC_VALUES, null);
// default is FCS, can't use ENUM due to trie-field terms, FC rewrites to FCS for efficiency
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
for (int props : Arrays.asList(DOC_VALUES ^ UNINVERTIBLE,
DOC_VALUES)) {
SchemaField field = new SchemaField("field", new TrieIntField(), props, null);
// default is FCS, can't use ENUM due to trie-field terms, FC rewrites to FCS for efficiency
for (int mincount : Arrays.asList(0, 1)) {
// behavior should be independent of mincount
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
// UIF only allowed if field is UNINVERTIBLE
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FCS,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 0));
}
}
}
@Test
public void testNumericMultiValuedDV() {
SchemaField field = new SchemaField("field", new TrieIntField(), DOC_VALUES ^ MULTIVALUED, null);
// default is FC, can't use ENUM due to trie-field terms, can't use FCS because of multivalues
// default value is FC
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
for (int props : Arrays.asList(DOC_VALUES ^ MULTIVALUED ^ UNINVERTIBLE,
DOC_VALUES ^ MULTIVALUED)) {
SchemaField field = new SchemaField("field", new TrieIntField(), props, null);
// default value is FC
for (int mincount : Arrays.asList(0, 1)) {
// behavior should be independent of mincount
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
// UIF only allowed if field is UNINVERTIBLE
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FC,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, mincount));
}
}
}
@Test
public void testNumericSingleValuedNoDV() {
SchemaField field = new SchemaField("field", new TrieIntField(), 0, null);
// only works with FCS for mincount = 0, UIF for count > 0 is fine
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
for (int props : Arrays.asList(0 ^ UNINVERTIBLE,
0)) {
SchemaField field = new SchemaField("field", new TrieIntField(), props, null);
// FCS is used by default for most requested methods other then UIF -- regardless of mincount
for (int mincount : Arrays.asList(0, 1)) {
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
}
// UIF allowed only if UNINVERTIBLE *AND* mincount > 0
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 0));
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FCS,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 1));
}
}
@Test
public void testNumericMultiValuedNoDV() {
SchemaField field = new SchemaField("field", new TrieIntField(), MULTIVALUED, null);
// only works with FC for mincount = 0, UIF for count > 1 is fine
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
for (int props : Arrays.asList(MULTIVALUED ^ UNINVERTIBLE,
MULTIVALUED)) {
SchemaField field = new SchemaField("field", new TrieIntField(), props, null);
// FC is used by default for most requested methods other then UIF -- regardless of mincount
for (int mincount : Arrays.asList(0, 1)) {
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
}
// UIF allowed only if UNINVERTIBLE *AND* mincount > 0
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 0));
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FC,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 1));
}
}
@Test
public void testTextSingleValuedDV() {
SchemaField field = new SchemaField("field", new StrField(), DOC_VALUES, null);
// default is FC, otherwise just uses the passed-in method
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
public void testStringSingleValuedDV() {
for (int props : Arrays.asList(DOC_VALUES ^ UNINVERTIBLE,
DOC_VALUES)) {
SchemaField field = new SchemaField("field", new StrField(), props, null);
// default is FC, otherwise just uses the passed-in method as is unless UIF...
for (int mincount : Arrays.asList(0, 1)) {
// behavior should be independent of mincount
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
// UIF only allowed if field is UNINVERTIBLE
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FCS,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, mincount));
}
}
}
@Test
public void testTextMultiValuedDV() {
SchemaField field = new SchemaField("field", new StrField(), DOC_VALUES ^ MULTIVALUED, null);
// default is FC, can't use FCS because of multivalues
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
public void testStringMultiValuedDV() {
for (int props : Arrays.asList(MULTIVALUED ^ DOC_VALUES ^ UNINVERTIBLE,
MULTIVALUED ^ DOC_VALUES)) {
SchemaField field = new SchemaField("field", new StrField(), props, null);
// default is FC, can't use FCS because of multivalues...
for (int mincount : Arrays.asList(0, 1)) {
// behavior should be independent of mincount
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
// UIF only allowed if field is UNINVERTIBLE
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FC,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, mincount));
}
}
}
@Test
public void testTextSingleValuedNoDV() {
SchemaField field = new SchemaField("field", new StrField(), 0, null);
// default is FC, UIF rewrites to FCS for mincount = 0
// TODO should it rewrite to FC instead?
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
public void testStringSingleValuedNoDV() {
for (int props : Arrays.asList(0 ^ UNINVERTIBLE,
0)) {
SchemaField field = new SchemaField("field", new StrField(), props, null);
// default is FC, otherwise just uses the passed-in method as is unless UIF...
for (int mincount : Arrays.asList(0, 1)) {
// behavior should be independent of mincount
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
}
// UIF allowed only if UNINVERTIBLE *AND* mincount > 0
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 0));
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FCS,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 1));
}
}
@Test
public void testTextMultiValuedNoDV() {
SchemaField field = new SchemaField("field", new StrField(), MULTIVALUED, null);
// default is FC, can't use FCS for multivalued fields, UIF rewrites to FC for mincount = 0
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, 1));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.ENUM, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FCS, 1));
assertEquals(SimpleFacets.FacetMethod.UIF, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.UIF, 1));
assertEquals(SimpleFacets.FacetMethod.FC, SimpleFacets.selectFacetMethod(field, SimpleFacets.FacetMethod.FC, 1));
public void testStringMultiValuedNoDV() {
for (int props : Arrays.asList(MULTIVALUED ^ UNINVERTIBLE,
MULTIVALUED)) {
SchemaField field = new SchemaField("field", new StrField(), props, null);
// default is FC, can't use FCS because of multivalues...
for (int mincount : Arrays.asList(0, 1)) {
// behavior should be independent of mincount
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
}
// UIF allowed only if UNINVERTIBLE *AND* mincount > 0
assertEquals(FacetMethod.FC, SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 0));
assertEquals(propsMatch(props, UNINVERTIBLE) ? FacetMethod.UIF : FacetMethod.FC,
SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, 1));
}
}
@Test
public void testBooleanDefaults() {
// BoolField defaults to ENUM
SchemaField field = new SchemaField("field", new BoolField(), 0, null);
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, null, 1));
for (int props : Arrays.asList(0 ^ UNINVERTIBLE,
0)) {
SchemaField field = new SchemaField("field", new BoolField(), props, null);
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.ENUM, SimpleFacets.selectFacetMethod(field, null, 1));
}
}
@Test
public void testPointFields() {
// Methods other than FCS are not currently supported for PointFields
SchemaField field = new SchemaField("foo", new IntPointField());
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, 0));
field = new SchemaField("fooMV", new IntPointField(), 0x00000200, "0"); //MultiValued
assertTrue(field.multiValued());
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, 0));
assertEquals(SimpleFacets.FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, 0));
for (int props : Arrays.asList(MULTIVALUED ^ DOC_VALUES ^ UNINVERTIBLE,
MULTIVALUED ^ DOC_VALUES,
MULTIVALUED ^ UNINVERTIBLE,
UNINVERTIBLE,
MULTIVALUED,
DOC_VALUES,
0)) {
SchemaField field = new SchemaField("foo", new IntPointField(), props, null);
for (int mincount : Arrays.asList(0, 1)) {
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, null, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.ENUM, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FC, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.FCS, mincount));
assertEquals(FacetMethod.FCS, SimpleFacets.selectFacetMethod(field, FacetMethod.UIF, mincount));
}
}
}
}

View File

@ -390,13 +390,15 @@ public class TestBulkSchemaAPI extends RestTestBase {
" 'name':'a2',\n" +
" 'type': 'string',\n" +
" 'stored':true,\n" +
" 'indexed':true\n" +
" 'indexed':true,\n" +
" 'uninvertible':true,\n" +
" },\n" +
" 'add-dynamic-field' : {\n" +
" 'name' :'*_lol',\n" +
" 'type':'string',\n" +
" 'stored':true,\n" +
" 'indexed':true\n" +
" 'indexed':true,\n" +
" 'uninvertible':false,\n" +
" },\n" +
" 'add-copy-field' : {\n" +
" 'source' :'a1',\n" +
@ -470,6 +472,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
" 'add-field-type' : {" +
" 'name' : 'myWhitespaceTxtField',\n" +
" 'class':'solr.TextField',\n" +
" 'uninvertible':false,\n" +
" 'analyzer' : {'class' : 'org.apache.lucene.analysis.core.WhitespaceAnalyzer'}\n" +
" },\n"+
" 'add-field' : {\n" +
@ -532,6 +535,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
assertEquals("string", m.get("type"));
assertEquals(Boolean.TRUE, m.get("stored"));
assertEquals(Boolean.TRUE, m.get("indexed"));
assertEquals(Boolean.TRUE, m.get("uninvertible"));
m = getObj(harness,"*_lol", "dynamicFields");
assertNotNull("field *_lol not created", m);
@ -539,6 +543,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
assertEquals("string", m.get("type"));
assertEquals(Boolean.TRUE, m.get("stored"));
assertEquals(Boolean.TRUE, m.get("indexed"));
assertEquals(Boolean.FALSE, m.get("uninvertible"));
l = getSourceCopyFields(harness, "a1");
s = new HashSet();
@ -579,11 +584,13 @@ public class TestBulkSchemaAPI extends RestTestBase {
m = getObj(harness, "myWhitespaceTxtField", "fieldTypes");
assertNotNull(m);
assertEquals(Boolean.FALSE, m.get("uninvertible"));
assertNull(m.get("similarity")); // unspecified, expect default
m = getObj(harness, "a5", "fields");
assertNotNull("field a5 not created", m);
assertEquals("myWhitespaceTxtField", m.get("type"));
assertNull(m.get("uninvertible")); // inherited, but API shouldn't return w/o explicit showDefaults
assertFieldSimilarity("a5", BM25Similarity.class); // unspecified, expect default
m = getObj(harness, "wdf_nocase", "fields");

View File

@ -23,11 +23,12 @@ public class TestFieldResource extends SolrRestletTestBase {
public void testGetField() throws Exception {
assertQ("/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true",
"count(/response/lst[@name='field']) = 1",
"count(/response/lst[@name='field']/*) = 18",
"count(/response/lst[@name='field']/*) = 19",
"/response/lst[@name='field']/str[@name='name'] = 'test_postv'",
"/response/lst[@name='field']/str[@name='type'] = 'text'",
"/response/lst[@name='field']/bool[@name='indexed'] = 'true'",
"/response/lst[@name='field']/bool[@name='stored'] = 'true'",
"/response/lst[@name='field']/bool[@name='uninvertible'] = 'true'",
"/response/lst[@name='field']/bool[@name='docValues'] = 'false'",
"/response/lst[@name='field']/bool[@name='termVectors'] = 'true'",
"/response/lst[@name='field']/bool[@name='termPositions'] = 'true'",
@ -59,6 +60,7 @@ public class TestFieldResource extends SolrRestletTestBase {
"/field/type=='text'",
"/field/indexed==true",
"/field/stored==true",
"/field/uninvertible==true",
"/field/docValues==false",
"/field/termVectors==true",
"/field/termPositions==true",

View File

@ -26,12 +26,13 @@ public class TestFieldTypeResource extends SolrRestletTestBase {
final boolean expectedDocValues = Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP);
assertQ("/schema/fieldtypes/float?wt=xml&showDefaults=true",
"count(/response/lst[@name='fieldType']) = 1",
"count(/response/lst[@name='fieldType']/*) = 17",
"count(/response/lst[@name='fieldType']/*) = 18",
"/response/lst[@name='fieldType']/str[@name='name'] = 'float'",
"/response/lst[@name='fieldType']/str[@name='class'] = '"+expectedFloatClass+"'",
"/response/lst[@name='fieldType']/str[@name='precisionStep'] ='0'",
"/response/lst[@name='fieldType']/bool[@name='indexed'] = 'true'",
"/response/lst[@name='fieldType']/bool[@name='stored'] = 'true'",
"/response/lst[@name='fieldType']/bool[@name='uninvertible'] = 'true'",
"/response/lst[@name='fieldType']/bool[@name='docValues'] = '"+expectedDocValues+"'",
"/response/lst[@name='fieldType']/bool[@name='termVectors'] = 'false'",
"/response/lst[@name='fieldType']/bool[@name='termPositions'] = 'false'",
@ -63,6 +64,7 @@ public class TestFieldTypeResource extends SolrRestletTestBase {
"/fieldType/precisionStep=='0'",
"/fieldType/indexed==true",
"/fieldType/stored==true",
"/fieldType/uninvertible==true",
"/fieldType/docValues=="+expectedDocValues,
"/fieldType/termVectors==false",
"/fieldType/termPositions==false",

View File

@ -29,6 +29,7 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
doTest("bad-schema-not-indexed-but-norms.xml", "bad_field");
doTest("bad-schema-not-indexed-but-tf.xml", "bad_field");
doTest("bad-schema-not-indexed-but-pos.xml", "bad_field");
doTest("bad-schema-not-indexed-but-uninvertible.xml", "bad_field");
doTest("bad-schema-omit-tf-but-not-pos.xml", "bad_field");
}

View File

@ -64,6 +64,11 @@ public class SchemaVersionSpecificBehaviorTest extends SolrTestCaseJ4 {
assertEquals(f + " field's type has wrong useDocValuesAsStored for ver=" + ver,
( v < 1.6F ? false : true),
field.useDocValuesAsStored());
// uninvertable defaults to true (for now)
assertEquals(f + " field's type has wrong uninvertable for ver=" + ver,
true,
field.isUninvertible());
}
// regardless of version, explicit multiValued values on field or type

View File

@ -809,6 +809,15 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
params.add("q", "*:*");
params.add("fq", "{!collapse field="+group+" "+optional_min_or_max+"}");
assertQ(req(params), "*[count(//doc)=0]");
// if a field is uninvertible=false, it should behave the same as a field that is indexed=false
// this is currently ok on fields that don't exist on any docs in the index
for (String f : Arrays.asList("not_indexed_sS", "indexed_s_not_uninvert")) {
for (String hint : Arrays.asList("", " hint=top_fc")) {
assertQ(req(params("q", "*:*", "fq", "{!collapse field="+f+hint+"}"))
, "*[count(//doc)=0]");
}
}
}
public void testNoDocsHaveGroupField() throws Exception {
@ -918,7 +927,8 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
@Test
public void testForNotSupportedCases() {
String[] doc = {"id","3", "term_s", "YYYY", "test_ii", "5000", "test_l", "100", "test_f", "200"};
String[] doc = {"id","3", "term_s", "YYYY", "test_ii", "5000", "test_l", "100", "test_f", "200",
"not_indexed_sS", "zzz", "indexed_s_not_uninvert", "zzz"};
assertU(adoc(doc));
assertU(commit());
@ -930,6 +940,22 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
assertQEx("Should Fail with Bad Request", "org.apache.solr.search.SyntaxError: undefined field: \"bleh\"",
req("q","*:*", "fq","{!collapse field=bleh}"), SolrException.ErrorCode.BAD_REQUEST);
// if a field is uninvertible=false, it should behave the same as a field that is indexed=false ...
for (String f : Arrays.asList("not_indexed_sS", "indexed_s_not_uninvert")) {
{ // this currently propogates up the low level DocValues error in the common case...
Exception e = expectThrows(RuntimeException.class, IllegalStateException.class,
() -> h.query(req(params("q", "*:*",
"fq", "{!collapse field="+f+"}"))));
assertTrue("unexpected Message: " + e.getMessage(),
e.getMessage().contains("Re-index with correct docvalues type"));
}
{ // ... but in the case of hint=top_fc a bare NPE gets propogated up (SOLR-12979)...
expectThrows(RuntimeException.class, NullPointerException.class,
() -> h.query(req(params("q", "*:*",
"fq", "{!collapse field="+f+" hint=top_fc}"))));
}
}
}
@Test

View File

@ -37,7 +37,9 @@ import org.apache.solr.SolrTestCaseHS;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.macro.MacroExpander;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@ -212,6 +214,75 @@ public class TestJsonFacets extends SolrTestCaseHS {
client.commit();
}
public void testBehaviorEquivilenceOfUninvertibleFalse() throws Exception {
Client client = Client.localClient();
indexSimple(client);
// regardless of the facet method (parameterized via default at test class level)
// faceting on an "uninvertible=false docValues=false" field is not supported.
//
// it should behave the same as any attempt (using any method) at faceting on
// and "indexed=false docValues=false" field...
for (String f : Arrays.asList("where_s_not_indexed_sS",
"where_s_multi_not_uninvert",
"where_s_single_not_uninvert")) {
SolrQueryRequest request = req("rows", "0", "q", "num_i:[* TO 2]", "json.facet",
"{x: {type:terms, field:'"+f+"'}}");
if (FacetField.FacetMethod.DEFAULT_METHOD == FacetField.FacetMethod.DVHASH
&& !f.contains("multi")) {
// DVHASH is (currently) weird...
//
// it's ignored for multi valued fields -- but for single valued fields, it explicitly
// checks the *FieldInfos* on the reader to see if the DocVals type is ok.
//
// Which means that unlike most other facet method:xxx options, it fails hard if you try to use it
// on a field where no docs have been indexed (yet).
expectThrows(SolrException.class, () ->{
assertJQ(request);
});
} else {
// In most cases, we should just get no buckets back...
assertJQ(request
, "response/numFound==3"
, "facets/count==3"
, "facets/x=={buckets:[]}"
);
}
}
// regardless of the facet method (parameterized via default at test class level)
// faceting on an "uninvertible=false docValues=true" field should work,
//
// it should behave equivilently to it's copyField source...
for (String f : Arrays.asList("where_s",
"where_s_multi_not_uninvert_dv",
"where_s_single_not_uninvert_dv")) {
assertJQ(req("rows", "0", "q", "num_i:[* TO 2]", "json.facet",
"{x: {type:terms, field:'"+f+"'}}")
, "response/numFound==3"
, "facets/count==3"
, "facets/x=={buckets:[ {val:NY, count:2} , {val:NJ, count:1} ]}"
);
}
// faceting on an "uninvertible=false docValues=false" field should be possible
// when using method:enum w/sort:index
//
// it should behave equivilent to it's copyField source...
for (String f : Arrays.asList("where_s",
"where_s_multi_not_uninvert",
"where_s_single_not_uninvert")) {
assertJQ(req("rows", "0", "q", "num_i:[* TO 2]", "json.facet",
"{x: {type:terms, sort:'index asc', method:enum, field:'"+f+"'}}")
, "response/numFound==3"
, "facets/count==3"
, "facets/x=={buckets:[ {val:NJ, count:1} , {val:NY, count:2} ]}"
);
}
}
/**
* whitebox sanity checks that a shard request range facet that returns "between" or "after"
* will cause the correct "actual_end" to be returned

View File

@ -56,6 +56,7 @@ Fields can have many of the same properties as field types. Properties from the
|docValues |If true, the value of the field will be put in a column-oriented <<docvalues.adoc#docvalues,DocValues>> structure. |true or false |false
|sortMissingFirst sortMissingLast |Control the placement of documents when a sort field is not present. |true or false |false
|multiValued |If true, indicates that a single document might contain multiple values for this field type. |true or false |false
|uninvertible|If true, indicates that an `indexed="true" docValues="false"` field can be "un-inverted" at query time to build up large in memory data structure to serve in place of <<docvalues.adoc#docvalues,DocValues>>. *Defaults to true for historical reasons, but users are strongly encouraged to set this to `false` for stability and use `docValues="true"` as needed.*|true or false |true
|omitNorms |If true, omits the norms associated with this field (this disables length normalization for the field, and saves some memory). *Defaults to true for all primitive (non-analyzed) field types, such as int, float, data, bool, and string.* Only full-text fields or fields need norms. |true or false |*
|omitTermFreqAndPositions |If true, omits term frequency, positions, and payloads from postings for this field. This can be a performance boost for fields that don't require that information. It also reduces the storage space required for the index. Queries that rely on position that are issued on a field with this option will silently fail to find documents. *This property defaults to true for all field types that are not text fields.* |true or false |*
|omitPositions |Similar to `omitTermFreqAndPositions` but preserves term frequency information. |true or false |*

View File

@ -130,6 +130,7 @@ The default values for each property depend on the underlying `FieldType` class,
|docValues |If true, the value of the field will be put in a column-oriented <<docvalues.adoc#docvalues,DocValues>> structure. |true or false |false
|sortMissingFirst sortMissingLast |Control the placement of documents when a sort field is not present. |true or false |false
|multiValued |If true, indicates that a single document might contain multiple values for this field type. |true or false |false
|uninvertible|If true, indicates that an `indexed="true" docValues="false"` field can be "un-inverted" at query time to build up large in memory data structure to serve in place of <<docvalues.adoc#docvalues,DocValues>>. *Defaults to true for historical reasons, but users are strongly encouraged to set this to `false` for stability and use `docValues="true"` as needed.*|true or false |true
|omitNorms |If true, omits the norms associated with this field (this disables length normalization for the field, and saves some memory). *Defaults to true for all primitive (non-analyzed) field types, such as int, float, data, bool, and string.* Only full-text fields or fields need norms. |true or false |*
|omitTermFreqAndPositions |If true, omits term frequency, positions, and payloads from postings for this field. This can be a performance boost for fields that don't require that information. It also reduces the storage space required for the index. Queries that rely on position that are issued on a field with this option will silently fail to find documents. *This property defaults to true for all field types that are not text fields.* |true or false |*
|omitPositions |Similar to `omitTermFreqAndPositions` but preserves term frequency information. |true or false |*

View File

@ -24,6 +24,7 @@ public enum FieldFlag {
TOKENIZED('T', "Tokenized"),
STORED('S', "Stored"),
DOC_VALUES('D', "DocValues"),
UNINVERTIBLE('U', "UnInvertible"),
MULTI_VALUED('M', "Multivalued"),
TERM_VECTOR_STORED('V', "TermVector Stored"),
TERM_VECTOR_OFFSET('o', "Store Offset With TermVector"),

View File

@ -150,7 +150,8 @@ solrAdminApp.controller('SchemaController',
$scope.newField = {
stored: "true",
indexed: "true"
indexed: "true",
uninvertible: "true"
}
delete $scope.addErrors;
}

View File

@ -49,6 +49,13 @@ limitations under the License.
indexed
</label>
</p>
<p class="clearfix">
<label class="checkbox" for="add_uninvertible">
<input type="checkbox" ng-model="newField.uninvertible" id="add_uninvertible" title="Field should be uninvertible, it is generally recomended to use docValues instead." ng-true-value="'true'" ng-false-value="'false'">
uninvertible
</label>
</p>
<p class="clearfix">
<label class="checkbox" for="add_docValues">