mirror of https://github.com/apache/lucene.git
SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also sorting/faceting just like StrField
This commit is contained in:
parent
b0b963c68e
commit
95122e1448
|
@ -149,6 +149,10 @@ New Features
|
|||
|
||||
* SOLR-11890: Add multiKmeans Stream Evaluator (Joel Bernstein)
|
||||
|
||||
* SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also
|
||||
sorting/faceting just like StrField. By default uses only the first 1024 chars of the original
|
||||
input string values, but this is configurable. (hossman)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* <code>SortableTextField</code> is a specialized form of {@link TextField} that supports
|
||||
* Sorting and ValueSource functions, using <code>docValues</code> built from the first
|
||||
* <code>maxCharsForDocValues</code> characters of the original (pre-analyzed) String values of this field.
|
||||
* </p>
|
||||
* <p>
|
||||
* The implicit default value for <code>maxCharsForDocValues</code> is <code>1024</code>. If a field
|
||||
* type instance is configured with <code>maxCharsForDocValues <= 0</code> this overrides the default
|
||||
* with an effective value of "no limit" ({@link Integer#MAX_VALUE}).
|
||||
* </p>
|
||||
* <p>
|
||||
* Instances of this FieldType implicitly default to <code>docValues="true"</code> unless explicitly
|
||||
* configured with <code>docValues="false"</code>.
|
||||
* </p>
|
||||
* <p>
|
||||
* Just like {@link StrField}, instances of this field that are <code>multiValued="true"</code> support
|
||||
* the <code>field(name,min|max)</code> function, and implicitly sort on <code>min|max</code> depending
|
||||
* on the <code>asc|desc</code> direction selector.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> Unlike most other FieldTypes, this class defaults to
|
||||
* <code>useDocValuesAsStored="false"</code>. If an instance of this type (or a field that uses this type)
|
||||
* overrides this behavior to set <code>useDocValuesAsStored="true"</code> then instead of truncating the
|
||||
* original string value based on the effective value of <code>maxCharsForDocValues</code>, this class
|
||||
* will reject any documents w/a field value longer then that limit -- causing the document update to fail.
|
||||
* This behavior exists to prevent situations that could result in a search client reieving only a truncated
|
||||
* version of the original field value in place of a <code>stored</code> value.
|
||||
* </p>
|
||||
*/
|
||||
public class SortableTextField extends TextField {
|
||||
|
||||
public static final int DEFAULT_MAX_CHARS_FOR_DOC_VALUES = 1024;
|
||||
|
||||
private int maxCharsForDocValues = DEFAULT_MAX_CHARS_FOR_DOC_VALUES;
|
||||
|
||||
protected void init(IndexSchema schema, Map<String,String> args) {
|
||||
{
|
||||
final String maxS = args.remove("maxCharsForDocValues");
|
||||
if (maxS != null) {
|
||||
maxCharsForDocValues = Integer.parseInt(maxS);
|
||||
if (maxCharsForDocValues <= 0) {
|
||||
maxCharsForDocValues = Integer.MAX_VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// by the time our init() is called, super.setArgs has already removed & procesesd any explicit
|
||||
// "docValues=foo" or useDocValuesAsStored=bar args...
|
||||
// - If the user explicitly said docValues=false, we want to respect that and not change it.
|
||||
// - if the user didn't explicit specify anything, then we want to implicitly *default* docValues=true
|
||||
// - The inverse is true for useDocValuesAsStored=true:
|
||||
// - if explict, then respect it; else implicitly default to useDocValuesAsStored=false
|
||||
// ...lucky for us, setArgs preserved info about explicitly set true|false properties...
|
||||
if (! on(falseProperties, DOC_VALUES)) {
|
||||
properties |= DOC_VALUES;
|
||||
}
|
||||
if (! on(trueProperties, USE_DOCVALUES_AS_STORED)) {
|
||||
properties &= ~USE_DOCVALUES_AS_STORED;
|
||||
}
|
||||
|
||||
super.init(schema, args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<IndexableField> createFields(SchemaField field, Object value) {
|
||||
IndexableField f = createField( field, value);
|
||||
if (! field.hasDocValues()) {
|
||||
return Collections.singletonList(f);
|
||||
}
|
||||
final String origString = value.toString();
|
||||
final int origLegth = origString.length();
|
||||
final boolean truncate = maxCharsForDocValues < origLegth;
|
||||
if (field.useDocValuesAsStored() && truncate) {
|
||||
// if the user has explicitly configured useDocValuesAsStored, we need a special
|
||||
// check to fail docs where the values are too long -- we don't want to silently
|
||||
// accept and then have search queries returning partial values
|
||||
throw new SolrException
|
||||
(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Can not use field " + field.getName() + " with values longer then maxCharsForDocValues=" +
|
||||
maxCharsForDocValues + " when useDocValuesAsStored=true (length=" + origLegth + ")");
|
||||
}
|
||||
final BytesRef bytes = new BytesRef(truncate ? origString.subSequence(0, maxCharsForDocValues) : origString);
|
||||
|
||||
final IndexableField docval = field.multiValued()
|
||||
? new SortedSetDocValuesField(field.getName(), bytes)
|
||||
: new SortedDocValuesField(field.getName(), bytes);
|
||||
|
||||
if (null == f) {
|
||||
return Collections.singletonList(docval);
|
||||
}
|
||||
return Arrays.asList(f, docval);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* this field type supports DocValues, this method is always a No-Op
|
||||
*/
|
||||
@Override
|
||||
protected void checkSupportsDocValues() {
|
||||
// No-Op
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(SchemaField field, boolean reverse) {
|
||||
if (! field.hasDocValues()) {
|
||||
// type defaults to docValues=true, so error msg from perspective that
|
||||
// either type or field must have docValues="false"
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Can not sort on this type of field when docValues=\"false\", field: " + field.getName());
|
||||
}
|
||||
|
||||
// NOTE: we explicitly bypass super.getSortField so that our getDefaultMultiValueSelectorForSort
|
||||
// is used and we don't get the historic Uninversion behavior of TextField.
|
||||
return getStringSort(field, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueSource getValueSource(SchemaField field, QParser parser) {
|
||||
if (! field.hasDocValues()) {
|
||||
// type defaults to docValues=true, so error msg from perspective that
|
||||
// either type or field must have docValues="false"
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Can not use ValueSource on this type of field when docValues=\"false\", field: " + field.getName());
|
||||
}
|
||||
return super.getValueSource(field, parser);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MultiValueSelector getDefaultMultiValueSelectorForSort(SchemaField field, boolean reverse) {
|
||||
return reverse ? MultiValueSelector.MAX : MultiValueSelector.MIN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueSource getSingleValueSource(MultiValueSelector choice, SchemaField field, QParser parser) {
|
||||
// trivial base case
|
||||
if (!field.multiValued()) {
|
||||
// single value matches any selector
|
||||
return getValueSource(field, parser);
|
||||
}
|
||||
|
||||
// See LUCENE-6709
|
||||
if (! field.hasDocValues()) {
|
||||
// type defaults to docValues=true, so error msg from perspective that
|
||||
// either type or field must have docValues="false"
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Can not select '" + choice.toString() + "' value from multivalued field ("+
|
||||
field.getName() +") when docValues=\"false\", field: " + field.getName());
|
||||
}
|
||||
SortedSetSelector.Type selectorType = choice.getSortedSetSelectorType();
|
||||
if (null == selectorType) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
choice.toString() + " is not a supported option for picking a single value"
|
||||
+ " from the multivalued field: " + field.getName() +
|
||||
" (type: " + this.getTypeName() + ")");
|
||||
}
|
||||
|
||||
return new SortedSetFieldSource(field.getName(), selectorType);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* this field type is not uninvertable, this method always returns null
|
||||
*/
|
||||
@Override
|
||||
public Type getUninversionType(SchemaField sf) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* This implementation always returns false.
|
||||
*/
|
||||
@Override
|
||||
public boolean multiValuedFieldCache() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -118,8 +118,13 @@ public class TextField extends FieldType {
|
|||
@Override
|
||||
public SortField getSortField(SchemaField field, boolean reverse) {
|
||||
/* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */
|
||||
return getSortedSetSortField(field, SortedSetSelector.Type.MIN, reverse,
|
||||
SortField.STRING_FIRST, SortField.STRING_LAST);
|
||||
return getSortedSetSortField(field,
|
||||
// historical behavior based on how the early versions of the FieldCache
|
||||
// would deal with multiple indexed terms in a singled valued field...
|
||||
//
|
||||
// Always use the 'min' value from the (Uninverted) "psuedo doc values"
|
||||
SortedSetSelector.Type.MIN,
|
||||
reverse, SortField.STRING_FIRST, SortField.STRING_LAST);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<schema name="minimal" version="1.6">
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<field name="id" type="str" indexed="true" docValues="true" stored="true" />
|
||||
<field name="id_i" type="int" indexed="true" docValues="true" stored="true" />
|
||||
|
||||
<!-- NOTE: docValues="true" should be implicit for SortableTextField -->
|
||||
|
||||
<field name="whitespace_stxt" type="whitespace_stxt" indexed="true" stored="true" />
|
||||
<field name="whitespace_m_stxt" type="whitespace_stxt" indexed="true" stored="true" multiValued="true" />
|
||||
<!-- explicit docValues="false" in this version... -->
|
||||
<field name="whitespace_nodv_stxt" type="whitespace_stxt" indexed="true" docValues="false" stored="true" />
|
||||
<!-- only docValues in this version, no index or stored... -->
|
||||
<field name="whitespace_nois_stxt" type="whitespace_stxt" indexed="false" docValues="true" stored="false" />
|
||||
|
||||
<field name="whitespace_max3_stxt" type="whitespace_max3_stxt" />
|
||||
<field name="whitespace_max6_stxt" type="whitespace_max6_stxt" />
|
||||
<field name="whitespace_max0_stxt" type="whitespace_max0_stxt" />
|
||||
<field name="whitespace_maxNeg_stxt" type="whitespace_maxNeg_stxt" />
|
||||
|
||||
<field name="whitespace_f_stxt" type="whitespace_f_stxt" indexed="true" docValues="true" stored="true" />
|
||||
<field name="whitespace_l_stxt" type="whitespace_l_stxt" indexed="true" docValues="true" stored="true" />
|
||||
|
||||
<field name="keyword_stxt" type="keyword_stxt" indexed="true" stored="true" />
|
||||
<!-- explicit docValues="true" in this field version... -->
|
||||
<field name="keyword_dv_stxt" type="keyword_stxt" indexed="true" docValues="true" stored="true" />
|
||||
|
||||
<!-- for behavioral equivilency testing -->
|
||||
<field name="whitespace_plain_txt" type="whitespace_plain_txt" />
|
||||
<field name="whitespace_plain_str" type="str" />
|
||||
<field name="keyword_s_dv" type="str" indexed="false" docValues="true" stored="true" />
|
||||
<field name="keyword_s" type="str" indexed="true" docValues="false" stored="true" />
|
||||
|
||||
<!-- . -->
|
||||
|
||||
<copyField source="whitespace_stxt" dest="whitespace_m_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_nodv_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_nois_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_max3_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_max6_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_max0_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_maxNeg_stxt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_plain_txt"/>
|
||||
<copyField source="whitespace_stxt" dest="whitespace_plain_str"/>
|
||||
|
||||
<copyField source="whitespace_f_stxt" dest="whitespace_l_stxt"/>
|
||||
|
||||
<copyField source="keyword_stxt" dest="keyword_dv_stxt"/>
|
||||
<copyField source="keyword_stxt" dest="keyword_s"/>
|
||||
<copyField source="keyword_stxt" dest="keyword_s_dv"/>
|
||||
|
||||
<copyField source="id" dest="id_i"/>
|
||||
|
||||
<!-- . -->
|
||||
|
||||
<!-- NOTE: explicitly not specifying docValues=true on these, it should be implicit default -->
|
||||
<fieldType name="whitespace_stxt" class="solr.SortableTextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="whitespace_max3_stxt" class="solr.SortableTextField" maxCharsForDocValues="3">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="whitespace_max6_stxt" class="solr.SortableTextField" maxCharsForDocValues="6">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="whitespace_max0_stxt" class="solr.SortableTextField" maxCharsForDocValues="0">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="whitespace_maxNeg_stxt" class="solr.SortableTextField" maxCharsForDocValues="-42">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="whitespace_l_stxt" class="solr.SortableTextField" sortMissingLast="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="whitespace_f_stxt" class="solr.SortableTextField" sortMissingFirst="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<fieldType name="keyword_stxt" class="solr.SortableTextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- some special edge cases for testing how useDocValuesAsStored (on type or field)
|
||||
interacts with maxCharsForDocValues
|
||||
NOTE: 2 types and 6 fields ending in '_usedvs': either '_has_usedvs' or '_negates_usedvs'
|
||||
-->
|
||||
<!-- max6 -->
|
||||
<field name="max6_field_has_usedvs" type="whitespace_max6_stxt" stored="false" useDocValuesAsStored="true" />
|
||||
<field name="max6_type_has_usedvs" type="max6_type_has_usedvs" />
|
||||
<field name="max6_field_negates_usedvs" type="max6_type_has_usedvs" useDocValuesAsStored="false" />
|
||||
<fieldType name="max6_type_has_usedvs" class="solr.SortableTextField" stored="false"
|
||||
maxCharsForDocValues="6" useDocValuesAsStored="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<!-- max0 -->
|
||||
<field name="max0_field_has_usedvs" type="whitespace_max0_stxt" stored="false" useDocValuesAsStored="true" />
|
||||
<field name="max0_type_has_usedvs" type="max0_type_has_usedvs" />
|
||||
<field name="max0_field_negates_usedvs" type="max0_type_has_usedvs" useDocValuesAsStored="false" />
|
||||
<fieldType name="max0_type_has_usedvs" class="solr.SortableTextField" stored="false"
|
||||
maxCharsForDocValues="0" useDocValuesAsStored="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="str" class="solr.StrField"/>
|
||||
<fieldType name="whitespace_plain_txt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="int" class="${solr.tests.IntegerFieldType}"/>
|
||||
</schema>
|
|
@ -519,6 +519,20 @@ valued. -->
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- See TestMinMaxOnMultiValuedField -->
|
||||
<field name="val_stxt_s_dv" type="whitespace_stxt" multiValued="true"/>
|
||||
<field name="val_stxt_missf_s_dv" type="whitespace_stxt" multiValued="true" sortMissingFirst="true"/>
|
||||
<field name="val_stxt_missl_s_dv" type="whitespace_stxt" multiValued="true" sortMissingLast="true"/>
|
||||
<field name="val_stxt_s_nodv" type="whitespace_stxt" multiValued="true" docValues="false" />
|
||||
<!-- NOTE: explicitly not specifying docValues=true, it should be implicit default -->
|
||||
<fieldType name="whitespace_stxt" class="solr.SortableTextField" indexed="true" stored="true">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
|
||||
<!-- Field to use to determine and enforce document uniqueness.
|
||||
Unless this field is marked with required="false", it will be a required field
|
||||
-->
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.search.similarities.BM25Similarity;
|
|||
import org.apache.lucene.misc.SweetSpotSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.schema.SimilarityFactory;
|
||||
|
@ -84,7 +85,6 @@ public class TestBulkSchemaAPI extends RestTestBase {
|
|||
jetty.stop();
|
||||
jetty = null;
|
||||
}
|
||||
client = null;
|
||||
if (restTestHarness != null) {
|
||||
restTestHarness.close();
|
||||
}
|
||||
|
@ -840,6 +840,60 @@ public class TestBulkSchemaAPI extends RestTestBase {
|
|||
map = (Map)ObjectBuilder.getVal(new JSONParser(new StringReader(response)));
|
||||
assertNull(map.get("error"));
|
||||
}
|
||||
public void testSortableTextFieldWithAnalyzer() throws Exception {
|
||||
String fieldTypeName = "sort_text_type";
|
||||
String fieldName = "sort_text";
|
||||
String payload = "{\n" +
|
||||
" 'add-field-type' : {" +
|
||||
" 'name' : '" + fieldTypeName + "',\n" +
|
||||
" 'stored':true,\n" +
|
||||
" 'indexed':true\n" +
|
||||
" 'maxCharsForDocValues':6\n" +
|
||||
" 'class':'solr.SortableTextField',\n" +
|
||||
" 'analyzer' : {'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'}},\n" +
|
||||
" },\n"+
|
||||
" 'add-field' : {\n" +
|
||||
" 'name':'" + fieldName + "',\n" +
|
||||
" 'type': '"+fieldTypeName+"',\n" +
|
||||
" }\n" +
|
||||
"}\n";
|
||||
|
||||
String response = restTestHarness.post("/schema", json(payload));
|
||||
|
||||
Map map = (Map) ObjectBuilder.getVal(new JSONParser(new StringReader(response)));
|
||||
assertNull(response, map.get("errors"));
|
||||
|
||||
Map fields = getObj(restTestHarness, fieldName, "fields");
|
||||
assertNotNull("field " + fieldName + " not created", fields);
|
||||
|
||||
assertEquals(0,
|
||||
getSolrClient().add(Arrays.asList(sdoc("id","1",fieldName,"xxx aaa"),
|
||||
sdoc("id","2",fieldName,"xxx bbb aaa"),
|
||||
sdoc("id","3",fieldName,"xxx bbb zzz"))).getStatus());
|
||||
|
||||
assertEquals(0, getSolrClient().commit().getStatus());
|
||||
{
|
||||
SolrDocumentList docs = getSolrClient().query
|
||||
(params("q",fieldName+":xxx","sort", fieldName + " asc, id desc")).getResults();
|
||||
|
||||
assertEquals(3L, docs.getNumFound());
|
||||
assertEquals(3L, docs.size());
|
||||
assertEquals("1", docs.get(0).getFieldValue("id"));
|
||||
assertEquals("3", docs.get(1).getFieldValue("id"));
|
||||
assertEquals("2", docs.get(2).getFieldValue("id"));
|
||||
}
|
||||
{
|
||||
SolrDocumentList docs = getSolrClient().query
|
||||
(params("q",fieldName+":xxx", "sort", fieldName + " desc, id asc")).getResults();
|
||||
|
||||
assertEquals(3L, docs.getNumFound());
|
||||
assertEquals(3L, docs.size());
|
||||
assertEquals("2", docs.get(0).getFieldValue("id"));
|
||||
assertEquals("3", docs.get(1).getFieldValue("id"));
|
||||
assertEquals("1", docs.get(2).getFieldValue("id"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testSimilarityParser() throws Exception {
|
||||
RestTestHarness harness = restTestHarness;
|
||||
|
|
|
@ -0,0 +1,562 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
|
||||
public class TestSortableTextField extends SolrTestCaseJ4 {
|
||||
|
||||
protected static final String BIG_CONST
|
||||
= StringUtils.repeat("x", SortableTextField.DEFAULT_MAX_CHARS_FOR_DOC_VALUES);
|
||||
|
||||
@BeforeClass
|
||||
public static void create() throws Exception {
|
||||
initCore("solrconfig-minimal.xml","schema-sorting-text.xml");
|
||||
|
||||
// sanity check our fields & types...
|
||||
|
||||
// these should all use docValues (either explicitly or implicitly)...
|
||||
for (String n : Arrays.asList("keyword_stxt",
|
||||
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
|
||||
|
||||
FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
|
||||
assertEquals("type " + ft.getTypeName() + " should have docvalues - schema got changed?",
|
||||
true, ft.getNamedPropertyValues(true).get("docValues")) ;
|
||||
}
|
||||
for (String n : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
|
||||
"whitespace_stxt", "whitespace_nois_stxt",
|
||||
"whitespace_f_stxt", "whitespace_l_stxt")) {
|
||||
|
||||
SchemaField sf = h.getCore().getLatestSchema().getField(n);
|
||||
assertTrue("field " + sf.getName() + " should have docvalues - schema got changed?",
|
||||
sf.hasDocValues()) ;
|
||||
}
|
||||
|
||||
{ // this field should *NOT* have docValues .. should behave like a plain old TextField
|
||||
SchemaField sf = h.getCore().getLatestSchema().getField("whitespace_nodv_stxt");
|
||||
assertFalse("field " + sf.getName() + " should not have docvalues - schema got changed?",
|
||||
sf.hasDocValues()) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Before
|
||||
public void cleanup() throws Exception {
|
||||
clearIndex();
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?", "whitespace_f_stxt", "aaa bbb"));
|
||||
assertU(adoc("id","2", "whitespace_stxt", "how now brown dog ?", "whitespace_f_stxt", "bbb aaa"));
|
||||
assertU(adoc("id","3", "whitespace_stxt", "how now brown cat ?", "whitespace_f_stxt", "xxx yyy"));
|
||||
assertU(adoc("id","4", "whitespace_stxt", "dog and cat" /* no val for whitespace_f_stxt */));
|
||||
|
||||
assertU(commit());
|
||||
|
||||
// search & sort
|
||||
// NOTE: even if the field is indexed=false, should still be able to sort on it
|
||||
for (String sortf : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt", "whitespace_plain_str")) {
|
||||
assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " asc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]"
|
||||
, "//result/doc[2]/str[@name='id'][.=3]"
|
||||
);
|
||||
assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " desc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=3]"
|
||||
, "//result/doc[2]/str[@name='id'][.=4]"
|
||||
);
|
||||
assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " asc")
|
||||
, "//*[@numFound='3']"
|
||||
, "//result/doc[1]/str[@name='id'][.=3]"
|
||||
, "//result/doc[2]/str[@name='id'][.=1]"
|
||||
, "//result/doc[3]/str[@name='id'][.=2]"
|
||||
);
|
||||
assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " desc")
|
||||
, "//*[@numFound='3']"
|
||||
, "//result/doc[1]/str[@name='id'][.=2]"
|
||||
, "//result/doc[2]/str[@name='id'][.=1]"
|
||||
, "//result/doc[3]/str[@name='id'][.=3]"
|
||||
);
|
||||
|
||||
// we should still be able to search if docValues="false" (but sort on a diff field)
|
||||
assertQ(req("q","whitespace_nodv_stxt:cat", "sort", sortf + " asc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]"
|
||||
, "//result/doc[2]/str[@name='id'][.=3]"
|
||||
);
|
||||
}
|
||||
|
||||
// attempting to sort on docValues="false" field should give an error...
|
||||
assertQEx("attempting to sort on docValues=false field should give an error",
|
||||
"when docValues=\"false\"",
|
||||
req("q","*:*", "sort", "whitespace_nodv_stxt asc"),
|
||||
ErrorCode.BAD_REQUEST);
|
||||
|
||||
// sortMissing - whitespace_f_stxt copyField to whitespace_l_stxt
|
||||
assertQ(req("q","*:*", "sort", "whitespace_f_stxt asc")
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]"
|
||||
, "//result/doc[2]/str[@name='id'][.=1]"
|
||||
, "//result/doc[3]/str[@name='id'][.=2]"
|
||||
, "//result/doc[4]/str[@name='id'][.=3]"
|
||||
);
|
||||
assertQ(req("q","*:*", "sort", "whitespace_f_stxt desc")
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]"
|
||||
, "//result/doc[2]/str[@name='id'][.=3]"
|
||||
, "//result/doc[3]/str[@name='id'][.=2]"
|
||||
, "//result/doc[4]/str[@name='id'][.=1]"
|
||||
);
|
||||
assertQ(req("q","*:*", "sort", "whitespace_l_stxt asc")
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=1]"
|
||||
, "//result/doc[2]/str[@name='id'][.=2]"
|
||||
, "//result/doc[3]/str[@name='id'][.=3]"
|
||||
, "//result/doc[4]/str[@name='id'][.=4]"
|
||||
);
|
||||
assertQ(req("q","*:*", "sort", "whitespace_l_stxt desc")
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=3]"
|
||||
, "//result/doc[2]/str[@name='id'][.=2]"
|
||||
, "//result/doc[3]/str[@name='id'][.=1]"
|
||||
, "//result/doc[4]/str[@name='id'][.=4]"
|
||||
);
|
||||
}
|
||||
|
||||
public void testSimpleSearchAndFacets() throws Exception {
|
||||
assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?"));
|
||||
assertU(adoc("id","2", "whitespace_stxt", "how now brown cow ?"));
|
||||
assertU(adoc("id","3", "whitespace_stxt", "holy cow !"));
|
||||
assertU(adoc("id","4", "whitespace_stxt", "dog and cat"));
|
||||
|
||||
assertU(commit());
|
||||
|
||||
// NOTE: even if the field is indexed=false, should still be able to facet on it
|
||||
for (String facet : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt",
|
||||
"whitespace_m_stxt", "whitespace_plain_str")) {
|
||||
for (String search : Arrays.asList("whitespace_stxt", "whitespace_nodv_stxt",
|
||||
"whitespace_m_stxt", "whitespace_plain_txt")) {
|
||||
// facet.field
|
||||
final String fpre = "//lst[@name='facet_fields']/lst[@name='"+facet+"']/";
|
||||
assertQ(req("q", search + ":cow", "rows", "0",
|
||||
"facet.field", facet, "facet", "true")
|
||||
, "//*[@numFound='3']"
|
||||
, fpre + "int[@name='how now brown cow ?'][.=2]"
|
||||
, fpre + "int[@name='holy cow !'][.=1]"
|
||||
, fpre + "int[@name='dog and cat'][.=0]"
|
||||
);
|
||||
|
||||
// json facet
|
||||
final String jpre = "//lst[@name='facets']/lst[@name='x']/arr[@name='buckets']/";
|
||||
assertQ(req("q", search + ":cow", "rows", "0",
|
||||
"json.facet", "{x:{ type: terms, field:'" + facet + "', mincount:0 }}")
|
||||
, "//*[@numFound='3']"
|
||||
, jpre + "lst[str[@name='val'][.='how now brown cow ?']][int[@name='count'][.=2]]"
|
||||
, jpre + "lst[str[@name='val'][.='holy cow !']][int[@name='count'][.=1]]"
|
||||
, jpre + "lst[str[@name='val'][.='dog and cat']][int[@name='count'][.=0]]"
|
||||
);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testWhiteboxIndexReader() throws Exception {
|
||||
assertU(adoc("id","1",
|
||||
"whitespace_stxt", "how now brown cow ?",
|
||||
"whitespace_m_stxt", "xxx",
|
||||
"whitespace_m_stxt", "yyy",
|
||||
"whitespace_f_stxt", "aaa bbb",
|
||||
"keyword_stxt", "Blarggghhh!"));
|
||||
assertU(commit());
|
||||
|
||||
final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
|
||||
try {
|
||||
final LeafReader r = searcher.get().getSlowAtomicReader();
|
||||
|
||||
// common cases...
|
||||
for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
|
||||
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
|
||||
assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
|
||||
assertEquals("DocValuesType: " + field,
|
||||
DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType());
|
||||
assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
|
||||
assertNotNull("Terms: " + field, r.terms(field));
|
||||
|
||||
}
|
||||
|
||||
// special cases...
|
||||
assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
|
||||
assertEquals(DocValuesType.NONE,
|
||||
r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
|
||||
assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
|
||||
assertNotNull(r.terms("whitespace_nodv_stxt"));
|
||||
//
|
||||
assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
|
||||
assertEquals(DocValuesType.SORTED,
|
||||
r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
|
||||
assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
|
||||
assertNull(r.terms("whitespace_nois_stxt"));
|
||||
//
|
||||
assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
|
||||
assertEquals(DocValuesType.SORTED_SET,
|
||||
r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
|
||||
assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
|
||||
assertNotNull(r.terms("whitespace_m_stxt"));
|
||||
|
||||
} finally {
|
||||
if (null != searcher) {
|
||||
searcher.decref();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testWhiteboxCreateFields() throws Exception {
|
||||
List<IndexableField> values = null;
|
||||
|
||||
// common case...
|
||||
for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
|
||||
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
|
||||
values = createIndexableFields(field);
|
||||
assertEquals(field, 2, values.size());
|
||||
assertThat(field, values.get(0), instanceOf(Field.class));
|
||||
assertThat(field, values.get(1), instanceOf(SortedDocValuesField.class));
|
||||
}
|
||||
|
||||
// special cases...
|
||||
values = createIndexableFields("whitespace_nois_stxt");
|
||||
assertEquals(1, values.size());
|
||||
assertThat(values.get(0), instanceOf(SortedDocValuesField.class));
|
||||
//
|
||||
values = createIndexableFields("whitespace_nodv_stxt");
|
||||
assertEquals(1, values.size());
|
||||
assertThat(values.get(0), instanceOf(Field.class));
|
||||
//
|
||||
values = createIndexableFields("whitespace_m_stxt");
|
||||
assertEquals(2, values.size());
|
||||
assertThat(values.get(0), instanceOf(Field.class));
|
||||
assertThat(values.get(1), instanceOf(SortedSetDocValuesField.class));
|
||||
}
|
||||
private List<IndexableField> createIndexableFields(String fieldName) {
|
||||
SchemaField sf = h.getCore().getLatestSchema().getField(fieldName);
|
||||
return sf.getType().createFields(sf, "dummy value");
|
||||
}
|
||||
|
||||
public void testMaxCharsSort() throws Exception {
|
||||
assertU(adoc("id","1", "whitespace_stxt", "aaa bbb ccc ddd"));
|
||||
assertU(adoc("id","2", "whitespace_stxt", "aaa bbb xxx yyy"));
|
||||
assertU(adoc("id","3", "whitespace_stxt", "aaa bbb ccc xxx"));
|
||||
assertU(adoc("id","4", "whitespace_stxt", "aaa"));
|
||||
assertU(commit());
|
||||
|
||||
// all terms should be searchable in all fields, even if the docvalues are limited
|
||||
for (String searchF : Arrays.asList("whitespace_stxt", "whitespace_plain_txt",
|
||||
"whitespace_max3_stxt", "whitespace_max6_stxt",
|
||||
"whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
|
||||
// maxChars of 0 or neg should be equivilent to no max at all
|
||||
for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_plain_str",
|
||||
"whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
|
||||
|
||||
assertQ(req("q", searchF + ":ccc", "sort", sortF + " desc, id asc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=3]"
|
||||
, "//result/doc[2]/str[@name='id'][.=1]"
|
||||
);
|
||||
|
||||
assertQ(req("q", searchF + ":ccc", "sort", sortF + " asc, id desc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=1]"
|
||||
, "//result/doc[2]/str[@name='id'][.=3]"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// sorting on a maxChars limited fields should force tie breaker
|
||||
for (String dir : Arrays.asList("asc", "desc")) {
|
||||
// for max3, dir shouldn't matter - should always tie..
|
||||
assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id desc") // max3, id desc
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]"
|
||||
, "//result/doc[2]/str[@name='id'][.=3]"
|
||||
, "//result/doc[3]/str[@name='id'][.=2]"
|
||||
, "//result/doc[4]/str[@name='id'][.=1]"
|
||||
);
|
||||
assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id asc") // max3, id desc
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=1]"
|
||||
, "//result/doc[2]/str[@name='id'][.=2]"
|
||||
, "//result/doc[3]/str[@name='id'][.=3]"
|
||||
, "//result/doc[4]/str[@name='id'][.=4]"
|
||||
);
|
||||
}
|
||||
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id desc") // max6 asc, id desc
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
|
||||
, "//result/doc[2]/str[@name='id'][.=3]"
|
||||
, "//result/doc[3]/str[@name='id'][.=2]"
|
||||
, "//result/doc[4]/str[@name='id'][.=1]"
|
||||
);
|
||||
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id asc") // max6 asc, id desc
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
|
||||
, "//result/doc[2]/str[@name='id'][.=1]"
|
||||
, "//result/doc[3]/str[@name='id'][.=2]"
|
||||
, "//result/doc[4]/str[@name='id'][.=3]"
|
||||
);
|
||||
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id desc") // max6 desc, id desc
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=3]"
|
||||
, "//result/doc[2]/str[@name='id'][.=2]"
|
||||
, "//result/doc[3]/str[@name='id'][.=1]"
|
||||
, "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
|
||||
);
|
||||
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id asc") // max6 desc, id desc
|
||||
, "//*[@numFound='4']"
|
||||
, "//result/doc[1]/str[@name='id'][.=1]"
|
||||
, "//result/doc[2]/str[@name='id'][.=2]"
|
||||
, "//result/doc[3]/str[@name='id'][.=3]"
|
||||
, "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
|
||||
);
|
||||
|
||||
// sanity check that the default max is working....
|
||||
assertU(adoc("id","5", "whitespace_stxt", BIG_CONST + " aaa zzz"));
|
||||
assertU(adoc("id","6", "whitespace_stxt", BIG_CONST + " bbb zzz "));
|
||||
assertU(commit());
|
||||
// for these fields, the tie breaker should be the only thing that matters, regardless of direction...
|
||||
for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt")) {
|
||||
for (String dir : Arrays.asList("asc", "desc")) {
|
||||
assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id asc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=5]"
|
||||
, "//result/doc[2]/str[@name='id'][.=6]"
|
||||
);
|
||||
assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id desc")
|
||||
, "//*[@numFound='2']"
|
||||
, "//result/doc[1]/str[@name='id'][.=6]"
|
||||
, "//result/doc[2]/str[@name='id'][.=5]"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test how various permutations of useDocValuesAsStored and maxCharsForDocValues interact
|
||||
*/
|
||||
public void testUseDocValuesAsStored() throws Exception {
|
||||
ignoreException("when useDocValuesAsStored=true \\(length=");
|
||||
|
||||
// first things first...
|
||||
// unlike most field types, SortableTextField should default to useDocValuesAsStored==false
|
||||
// (check a handful that should have the default behavior)
|
||||
for (String n : Arrays.asList("keyword_stxt", "whitespace_max0_stxt", "whitespace_max6_stxt")) {
|
||||
{
|
||||
FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
|
||||
assertEquals("type " + ft.getTypeName() + " should not default to useDocValuesAsStored",
|
||||
false, ft.useDocValuesAsStored()) ;
|
||||
}
|
||||
{
|
||||
SchemaField sf = h.getCore().getLatestSchema().getField(n);
|
||||
assertEquals("field " + sf.getName() + " should not default to useDocValuesAsStored",
|
||||
false, sf.useDocValuesAsStored()) ;
|
||||
}
|
||||
}
|
||||
|
||||
// but it should be possible to set useDocValuesAsStored=true explicitly on types...
|
||||
int num_types_found = 0;
|
||||
for (Map.Entry<String,FieldType> entry : h.getCore().getLatestSchema().getFieldTypes().entrySet()) {
|
||||
if (entry.getKey().endsWith("_has_usedvs")) {
|
||||
num_types_found++;
|
||||
FieldType ft = entry.getValue();
|
||||
assertEquals("type " + ft.getTypeName() + " has unexpected useDocValuesAsStored value",
|
||||
true, ft.useDocValuesAsStored()) ;
|
||||
}
|
||||
}
|
||||
assertEquals("sanity check: wrong number of *_has_usedvs types found -- schema changed?",
|
||||
2, num_types_found);
|
||||
|
||||
|
||||
// ...and it should be possible to set/override useDocValuesAsStored=true on fields...
|
||||
int num_fields_found = 0;
|
||||
List<String> xpaths = new ArrayList<>(42);
|
||||
for (Map.Entry<String,SchemaField> entry : h.getCore().getLatestSchema().getFields().entrySet()) {
|
||||
if (entry.getKey().endsWith("_usedvs")) {
|
||||
num_fields_found++;
|
||||
final SchemaField sf = entry.getValue();
|
||||
final String name = sf.getName();
|
||||
|
||||
// some sanity check before we move on with the rest of our testing...
|
||||
assertFalse("schema change? field should not be stored=true: " + name, sf.stored());
|
||||
final boolean usedvs = name.endsWith("_has_usedvs");
|
||||
assertTrue("schema change broke assumptions: field must be '*_has_usedvs' or '*_negates_usedvs': " +
|
||||
name, usedvs ^ name.endsWith("_negates_usedvs"));
|
||||
final boolean max6 = name.startsWith("max6_");
|
||||
assertTrue("schema change broke assumptions: field must be 'max6_*' or 'max0_*': " +
|
||||
name, max6 ^ name.startsWith("max0_"));
|
||||
|
||||
assertEquals("Unexpected useDocValuesAsStored value for field: " + name,
|
||||
usedvs, sf.useDocValuesAsStored()) ;
|
||||
|
||||
final String docid = ""+num_fields_found;
|
||||
if (usedvs && max6) {
|
||||
// if useDocValuesAsStored==true and maxCharsForDocValues=N then longer values should fail
|
||||
|
||||
final String doc = adoc("id", docid, name, "apple pear orange");
|
||||
SolrException ex = expectThrows(SolrException.class, () -> { assertU(doc); });
|
||||
for (String expect : Arrays.asList("field " + name,
|
||||
"length=17",
|
||||
"useDocValuesAsStored=true",
|
||||
"maxCharsForDocValues=6")) {
|
||||
assertTrue("exception must mention " + expect + ": " + ex.getMessage(),
|
||||
ex.getMessage().contains(expect));
|
||||
}
|
||||
} else {
|
||||
// otherwise (useDocValuesAsStored==false *OR* maxCharsForDocValues=0) any value
|
||||
// should be fine when adding a doc and we should be able to search for it later...
|
||||
final String val = docid + " apple pear orange " + BIG_CONST;
|
||||
assertU(adoc("id", docid, name, val));
|
||||
String doc_xpath = "//result/doc[str[@name='id'][.='"+docid+"']]";
|
||||
|
||||
if (usedvs) {
|
||||
// ...and if it *does* usedvs, then we should defnitely see our value when searching...
|
||||
doc_xpath = doc_xpath + "[str[@name='"+name+"'][.='"+val+"']]";
|
||||
} else {
|
||||
// ...but if not, then we should definitely not see any value for our field...
|
||||
doc_xpath = doc_xpath + "[not(str[@name='"+name+"'])]";
|
||||
}
|
||||
xpaths.add(doc_xpath);
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals("sanity check: wrong number of *_usedvs fields found -- schema changed?",
|
||||
6, num_fields_found);
|
||||
|
||||
// check all our expected docs can be found (with the expected values)
|
||||
assertU(commit());
|
||||
xpaths.add("//*[@numFound='"+xpaths.size()+"']");
|
||||
assertQ(req("q", "*:*", "fl", "*"), xpaths.toArray(new String[xpaths.size()]));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* tests that a SortableTextField using KeywordTokenzier (w/docValues) behaves exactly the same as
|
||||
* StrFields that it's copied to for quering and sorting
|
||||
*/
|
||||
public void testRandomStrEquivilentBehavior() throws Exception {
|
||||
final List<String> test_fields = Arrays.asList("keyword_stxt", "keyword_dv_stxt",
|
||||
"keyword_s_dv", "keyword_s");
|
||||
// we use embedded client instead of assertQ: we want to compare the responses from multiple requests
|
||||
final SolrClient client = new EmbeddedSolrServer(h.getCore());
|
||||
|
||||
final int numDocs = atLeast(100);
|
||||
final int magicIdx = TestUtil.nextInt(random(), 1, numDocs);
|
||||
String magic = null;
|
||||
for (int i = 1; i <= numDocs; i++) {
|
||||
|
||||
// ideally we'd test all "realistic" unicode string, but EmbeddedSolrServer uses XML request writer
|
||||
// and has no option to change this so ctrl-characters break the request
|
||||
final String val = TestUtil.randomSimpleString(random(), 100);
|
||||
if (i == magicIdx) {
|
||||
magic = val;
|
||||
}
|
||||
assertEquals(0, client.add(sdoc("id", ""+i, "keyword_stxt", val)).getStatus());
|
||||
|
||||
}
|
||||
assertNotNull(magic);
|
||||
|
||||
assertEquals(0, client.commit().getStatus());
|
||||
|
||||
// query for magic term should match same doc regardless of field (reminder: keyword tokenizer)
|
||||
// (we need the filter in the unlikely event that magic value with randomly picked twice)
|
||||
for (String f : test_fields) {
|
||||
|
||||
final SolrDocumentList results = client.query(params("q", "{!field f="+f+" v=$v}",
|
||||
"v", magic,
|
||||
"fq", "id:" + magicIdx )).getResults();
|
||||
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + results,
|
||||
1L, results.getNumFound());
|
||||
final SolrDocument doc = results.get(0);
|
||||
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
|
||||
""+magicIdx, doc.getFieldValue("id"));
|
||||
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
|
||||
magic, doc.getFieldValue(f));
|
||||
}
|
||||
|
||||
// do some random id range queries using all 3 fields for sorting. results should be identical
|
||||
final int numQ = atLeast(10);
|
||||
for (int i = 0; i < numQ; i++) {
|
||||
final int hi = TestUtil.nextInt(random(), 1, numDocs-1);
|
||||
final int lo = TestUtil.nextInt(random(), 1, hi);
|
||||
final boolean fwd = random().nextBoolean();
|
||||
|
||||
SolrDocumentList previous = null;
|
||||
String prevField = null;
|
||||
for (String f : test_fields) {
|
||||
final SolrDocumentList results = client.query(params("q","id_i:["+lo+" TO "+hi+"]",
|
||||
"sort", f + (fwd ? " asc" : " desc") +
|
||||
// secondary on id for determinism
|
||||
", id asc")
|
||||
).getResults();
|
||||
assertEquals(results.toString(), (1L + hi - lo), results.getNumFound());
|
||||
if (null != previous) {
|
||||
assertEquals(prevField + " vs " + f,
|
||||
previous.getNumFound(), results.getNumFound());
|
||||
for (int d = 0; d < results.size(); d++) {
|
||||
assertEquals(prevField + " vs " + f + ": " + d,
|
||||
previous.get(d).getFieldValue("id"),
|
||||
results.get(d).getFieldValue("id"));
|
||||
assertEquals(prevField + " vs " + f + ": " + d,
|
||||
previous.get(d).getFieldValue(prevField),
|
||||
results.get(d).getFieldValue(f));
|
||||
|
||||
}
|
||||
}
|
||||
previous = results;
|
||||
prevField = f;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -59,6 +59,11 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
"date_missf_", "date_missl_",
|
||||
"enum_missf_", "enum_missl_",
|
||||
"bool_missf_", "bool_missl_" }, new String [] {"_dv"});
|
||||
checkFields(new String[] {"stxt_", // no expectation on missing first/last
|
||||
"stxt_missf_", "stxt_missl_" }, new String [] { "_dv"});
|
||||
checkFields(new String [] { "stxt_" }, // no expectation on missing first/last
|
||||
new String [] { "_nodv", "_dv" });
|
||||
checkFields(new String [] { "stxt_missf_", "stxt_missl_" }, new String [] { "_dv"});
|
||||
|
||||
}
|
||||
|
||||
|
@ -71,8 +76,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
SchemaField sf = schema.getField(f);
|
||||
assertTrue(f + " is not multivalued", sf.multiValued());
|
||||
assertEquals(f + " doesn't have expected docValues status",
|
||||
f.contains("dv") || f.endsWith("_p")
|
||||
|| Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP), sf.hasDocValues());
|
||||
((f.contains("dv") || f.endsWith("_p") || Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP))
|
||||
&& !f.contains("nodv")),
|
||||
sf.hasDocValues());
|
||||
assertEquals(f + " doesn't have expected index status",
|
||||
! f.contains("ni"), sf.indexed());
|
||||
|
||||
|
@ -178,19 +184,27 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
public void testBasicStrings() {
|
||||
checkBasicStrings("val_strs_dv");
|
||||
}
|
||||
public void testBasicSortableText() {
|
||||
checkBasicStrings("val_stxt_s_dv");
|
||||
checkBasicStrings("val_stxt_missf_s_dv");
|
||||
checkBasicStrings("val_stxt_missl_s_dv");
|
||||
}
|
||||
private void checkBasicStrings(final String field) {
|
||||
assertU(adoc(sdoc("id", "1",
|
||||
"val_strs_dv", "dog",
|
||||
"val_strs_dv", "xyz",
|
||||
"val_strs_dv", "cat")));
|
||||
assertU(adoc(sdoc("id", "2"))); // 2 has no val_strs_dv values
|
||||
field, "dog",
|
||||
field, "xyz",
|
||||
field, "cat")));
|
||||
assertU(adoc(sdoc("id", "2"))); // 2 has no values in tested field
|
||||
assertU(commit());
|
||||
|
||||
// id=1: has values
|
||||
assertQ(req("q","id:1"
|
||||
,"fl","exists_min_str:exists(field(val_strs_dv,min))"
|
||||
,"fl","exists_max_str:exists(field(val_strs_dv,max))"
|
||||
,"fl","min_str:field(val_strs_dv,min)"
|
||||
,"fl","max_str:field(val_strs_dv,max)"
|
||||
,"fl","exists_min_str:exists(field("+field+",min))"
|
||||
,"fl","exists_max_str:exists(field("+field+",max))"
|
||||
,"fl","min_str:field("+field+",min)"
|
||||
,"fl","max_str:field("+field+",max)"
|
||||
|
||||
)
|
||||
,"//*[@numFound='1']"
|
||||
|
@ -201,10 +215,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
);
|
||||
// id=2: no values
|
||||
assertQ(req("q","id:2"
|
||||
,"fl","exists_min_str:exists(field(val_strs_dv,min))"
|
||||
,"fl","exists_max_str:exists(field(val_strs_dv,max))"
|
||||
,"fl","min_str:field(val_strs_dv,min)"
|
||||
,"fl","max_str:field(val_strs_dv,max)"
|
||||
,"fl","exists_min_str:exists(field("+field+",min))"
|
||||
,"fl","exists_max_str:exists(field("+field+",max))"
|
||||
,"fl","min_str:field("+field+",min)"
|
||||
,"fl","max_str:field("+field+",max)"
|
||||
|
||||
)
|
||||
,"//*[@numFound='1']"
|
||||
|
@ -219,6 +233,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
testExpectedSortOrdering("val_strs_dv", false,
|
||||
null, "a", "cat", "dog", "wako", "xyz", "zzzzz");
|
||||
}
|
||||
public void testExpectedSortOrderingSortableText() {
|
||||
testExpectedSortOrdering("val_stxt_s_dv", false,
|
||||
null, "a", "cat", "dog", "wako", "xyz", "zzzzz");
|
||||
}
|
||||
|
||||
public void testExpectedSortMissingOrderings() {
|
||||
|
||||
|
@ -226,7 +244,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
// (in this simple test) we aren't using a secondary sort, so there is no way to disambiguate
|
||||
// docs that have those values from docs that have those *effective* sort values
|
||||
|
||||
testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
|
||||
testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
|
||||
testSortMissingMinMax("val_stxt", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
|
||||
|
||||
testSortMissingMinMax("val_int",
|
||||
Integer.MIN_VALUE+1L, -9999, 0, 99999, Integer.MAX_VALUE-1L);
|
||||
testSortMissingMinMax("val_long",
|
||||
|
@ -382,6 +402,15 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
|
|||
"docValues",
|
||||
req("q","*:*", "fl", "field(cat,'max')"),
|
||||
SolrException.ErrorCode.BAD_REQUEST);
|
||||
assertQEx("no error mentioning field name when asking for max on a non-dv sortable text field",
|
||||
"val_stxt_s_nodv",
|
||||
req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"),
|
||||
SolrException.ErrorCode.BAD_REQUEST);
|
||||
assertQEx("no error mentioning 'docValues' when asking for max on a non-dv sortable field",
|
||||
"docValues",
|
||||
req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"),
|
||||
SolrException.ErrorCode.BAD_REQUEST);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ An analyzer examines the text of fields and generates a token stream.
|
|||
|
||||
Analyzers are specified as a child of the `<fieldType>` element in the `schema.xml` configuration file (in the same `conf/` directory as `solrconfig.xml`).
|
||||
|
||||
In normal usage, only fields of type `solr.TextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `<analyzer>` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example:
|
||||
In normal usage, only fields of type `solr.TextField` or `solr.SortableTextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `<analyzer>` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
|
|
|
@ -37,10 +37,11 @@ Solr can sort query responses according to:
|
|||
* Document scores
|
||||
* <<function-queries.adoc#sort-by-function,Function results>>
|
||||
* The value of any primative field (numerics, string, boolean, dates, etc...) which has `docValues="true"` (or `multiValued="false"` and `indexed="true"` in which case the indexed terms will used to build DocValue like structures on the fly at runtime)
|
||||
* A TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term.
|
||||
* A SortableTextField which implicitly uses `docValues="true"` by default to allow sorting on the original input string regardless of the analyzers used for Searching.
|
||||
* A single-valued TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term per document. TextField does not support docValues="true", but a DocValue like structure will be built on the fly at runtime.
|
||||
** *NOTE:* If you want to be able to sort on a field whose contents you want to tokenize to facilitate searching, <<copying-fields.adoc#copying-fields,use a `copyField` directive>> in the the Schema to clone the field. Then search on the field and sort on its clone.
|
||||
|
||||
In the case of primative fields that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. This default behavior is equivilent to explicitly sorting using the 2 argument `<<function-queries.adoc#field-function,field()>>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc`
|
||||
In the case of primative fields, or SortableTextFields, that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. This default behavior is equivilent to explicitly sorting using the 2 argument `<<function-queries.adoc#field-function,field()>>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc`
|
||||
|
||||
The table below explains how Solr responds to various settings of the `sort` parameter.
|
||||
|
||||
|
|
|
@ -69,6 +69,8 @@ Configuration and usage of PreAnalyzedField is documented in the section <<work
|
|||
|
||||
|StrField |String (UTF-8 encoded string or Unicode). Strings are intended for small fields and are _not_ tokenized or analyzed in any way. They have a hard limit of slightly less than 32K.
|
||||
|
||||
|SortableTextField |A specialized version of TextField that allows (and defaults to) `docValues="true"` for sorting on the first 1024 characters of the original string prior to analysis -- the number of characters used for sorting can be overridden with the `maxCharsForDocValues` attribute.
|
||||
|
||||
|TextField |Text, usually multiple words or tokens.
|
||||
|
||||
|TrieDateField |*Deprecated*. Use DatePointField instead.
|
||||
|
|
Loading…
Reference in New Issue