SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also sorting/faceting just like StrField

This commit is contained in:
Chris Hostetter 2018-02-01 10:40:29 -07:00
parent b0b963c68e
commit 95122e1448
11 changed files with 1057 additions and 22 deletions

View File

@ -149,6 +149,10 @@ New Features
* SOLR-11890: Add multiKmeans Stream Evaluator (Joel Bernstein) * SOLR-11890: Add multiKmeans Stream Evaluator (Joel Bernstein)
* SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also
sorting/faceting just like StrField. By default uses only the first 1024 chars of the original
input string values, but this is configurable. (hossman)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -0,0 +1,215 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
/**
* <p>
* <code>SortableTextField</code> is a specialized form of {@link TextField} that supports
* Sorting and ValueSource functions, using <code>docValues</code> built from the first
* <code>maxCharsForDocValues</code> characters of the original (pre-analyzed) String values of this field.
* </p>
* <p>
* The implicit default value for <code>maxCharsForDocValues</code> is <code>1024</code>. If a field
* type instance is configured with <code>maxCharsForDocValues &lt;= 0</code> this overrides the default
* with an effective value of "no limit" ({@link Integer#MAX_VALUE}).
* </p>
* <p>
* Instances of this FieldType implicitly default to <code>docValues="true"</code> unless explicitly
* configured with <code>docValues="false"</code>.
* </p>
* <p>
* Just like {@link StrField}, instances of this field that are <code>multiValued="true"</code> support
* the <code>field(name,min|max)</code> function, and implicitly sort on <code>min|max</code> depending
* on the <code>asc|desc</code> direction selector.
* </p>
*
* <p>
* <b>NOTE:</b> Unlike most other FieldTypes, this class defaults to
* <code>useDocValuesAsStored="false"</code>. If an instance of this type (or a field that uses this type)
* overrides this behavior to set <code>useDocValuesAsStored="true"</code> then instead of truncating the
* original string value based on the effective value of <code>maxCharsForDocValues</code>, this class
* will reject any documents w/a field value longer then that limit -- causing the document update to fail.
* This behavior exists to prevent situations that could result in a search client reieving only a truncated
* version of the original field value in place of a <code>stored</code> value.
* </p>
*/
public class SortableTextField extends TextField {
public static final int DEFAULT_MAX_CHARS_FOR_DOC_VALUES = 1024;
private int maxCharsForDocValues = DEFAULT_MAX_CHARS_FOR_DOC_VALUES;
protected void init(IndexSchema schema, Map<String,String> args) {
{
final String maxS = args.remove("maxCharsForDocValues");
if (maxS != null) {
maxCharsForDocValues = Integer.parseInt(maxS);
if (maxCharsForDocValues <= 0) {
maxCharsForDocValues = Integer.MAX_VALUE;
}
}
}
// by the time our init() is called, super.setArgs has already removed & procesesd any explicit
// "docValues=foo" or useDocValuesAsStored=bar args...
// - If the user explicitly said docValues=false, we want to respect that and not change it.
// - if the user didn't explicit specify anything, then we want to implicitly *default* docValues=true
// - The inverse is true for useDocValuesAsStored=true:
// - if explict, then respect it; else implicitly default to useDocValuesAsStored=false
// ...lucky for us, setArgs preserved info about explicitly set true|false properties...
if (! on(falseProperties, DOC_VALUES)) {
properties |= DOC_VALUES;
}
if (! on(trueProperties, USE_DOCVALUES_AS_STORED)) {
properties &= ~USE_DOCVALUES_AS_STORED;
}
super.init(schema, args);
}
@Override
public List<IndexableField> createFields(SchemaField field, Object value) {
IndexableField f = createField( field, value);
if (! field.hasDocValues()) {
return Collections.singletonList(f);
}
final String origString = value.toString();
final int origLegth = origString.length();
final boolean truncate = maxCharsForDocValues < origLegth;
if (field.useDocValuesAsStored() && truncate) {
// if the user has explicitly configured useDocValuesAsStored, we need a special
// check to fail docs where the values are too long -- we don't want to silently
// accept and then have search queries returning partial values
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"Can not use field " + field.getName() + " with values longer then maxCharsForDocValues=" +
maxCharsForDocValues + " when useDocValuesAsStored=true (length=" + origLegth + ")");
}
final BytesRef bytes = new BytesRef(truncate ? origString.subSequence(0, maxCharsForDocValues) : origString);
final IndexableField docval = field.multiValued()
? new SortedSetDocValuesField(field.getName(), bytes)
: new SortedDocValuesField(field.getName(), bytes);
if (null == f) {
return Collections.singletonList(docval);
}
return Arrays.asList(f, docval);
}
/**
* {@inheritDoc}
* this field type supports DocValues, this method is always a No-Op
*/
@Override
protected void checkSupportsDocValues() {
// No-Op
}
@Override
public SortField getSortField(SchemaField field, boolean reverse) {
if (! field.hasDocValues()) {
// type defaults to docValues=true, so error msg from perspective that
// either type or field must have docValues="false"
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Can not sort on this type of field when docValues=\"false\", field: " + field.getName());
}
// NOTE: we explicitly bypass super.getSortField so that our getDefaultMultiValueSelectorForSort
// is used and we don't get the historic Uninversion behavior of TextField.
return getStringSort(field, reverse);
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {
if (! field.hasDocValues()) {
// type defaults to docValues=true, so error msg from perspective that
// either type or field must have docValues="false"
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Can not use ValueSource on this type of field when docValues=\"false\", field: " + field.getName());
}
return super.getValueSource(field, parser);
}
@Override
public MultiValueSelector getDefaultMultiValueSelectorForSort(SchemaField field, boolean reverse) {
return reverse ? MultiValueSelector.MAX : MultiValueSelector.MIN;
}
@Override
public ValueSource getSingleValueSource(MultiValueSelector choice, SchemaField field, QParser parser) {
// trivial base case
if (!field.multiValued()) {
// single value matches any selector
return getValueSource(field, parser);
}
// See LUCENE-6709
if (! field.hasDocValues()) {
// type defaults to docValues=true, so error msg from perspective that
// either type or field must have docValues="false"
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Can not select '" + choice.toString() + "' value from multivalued field ("+
field.getName() +") when docValues=\"false\", field: " + field.getName());
}
SortedSetSelector.Type selectorType = choice.getSortedSetSelectorType();
if (null == selectorType) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
choice.toString() + " is not a supported option for picking a single value"
+ " from the multivalued field: " + field.getName() +
" (type: " + this.getTypeName() + ")");
}
return new SortedSetFieldSource(field.getName(), selectorType);
}
/**
* {@inheritDoc}
* this field type is not uninvertable, this method always returns null
*/
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}
/**
* {@inheritDoc}
* This implementation always returns false.
*/
@Override
public boolean multiValuedFieldCache() {
return false;
}
}

View File

@ -118,8 +118,13 @@ public class TextField extends FieldType {
@Override @Override
public SortField getSortField(SchemaField field, boolean reverse) { public SortField getSortField(SchemaField field, boolean reverse) {
/* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */ /* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */
return getSortedSetSortField(field, SortedSetSelector.Type.MIN, reverse, return getSortedSetSortField(field,
SortField.STRING_FIRST, SortField.STRING_LAST); // historical behavior based on how the early versions of the FieldCache
// would deal with multiple indexed terms in a singled valued field...
//
// Always use the 'min' value from the (Uninverted) "psuedo doc values"
SortedSetSelector.Type.MIN,
reverse, SortField.STRING_FIRST, SortField.STRING_LAST);
} }
@Override @Override

View File

@ -0,0 +1,149 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="minimal" version="1.6">
<uniqueKey>id</uniqueKey>
<field name="id" type="str" indexed="true" docValues="true" stored="true" />
<field name="id_i" type="int" indexed="true" docValues="true" stored="true" />
<!-- NOTE: docValues="true" should be implicit for SortableTextField -->
<field name="whitespace_stxt" type="whitespace_stxt" indexed="true" stored="true" />
<field name="whitespace_m_stxt" type="whitespace_stxt" indexed="true" stored="true" multiValued="true" />
<!-- explicit docValues="false" in this version... -->
<field name="whitespace_nodv_stxt" type="whitespace_stxt" indexed="true" docValues="false" stored="true" />
<!-- only docValues in this version, no index or stored... -->
<field name="whitespace_nois_stxt" type="whitespace_stxt" indexed="false" docValues="true" stored="false" />
<field name="whitespace_max3_stxt" type="whitespace_max3_stxt" />
<field name="whitespace_max6_stxt" type="whitespace_max6_stxt" />
<field name="whitespace_max0_stxt" type="whitespace_max0_stxt" />
<field name="whitespace_maxNeg_stxt" type="whitespace_maxNeg_stxt" />
<field name="whitespace_f_stxt" type="whitespace_f_stxt" indexed="true" docValues="true" stored="true" />
<field name="whitespace_l_stxt" type="whitespace_l_stxt" indexed="true" docValues="true" stored="true" />
<field name="keyword_stxt" type="keyword_stxt" indexed="true" stored="true" />
<!-- explicit docValues="true" in this field version... -->
<field name="keyword_dv_stxt" type="keyword_stxt" indexed="true" docValues="true" stored="true" />
<!-- for behavioral equivilency testing -->
<field name="whitespace_plain_txt" type="whitespace_plain_txt" />
<field name="whitespace_plain_str" type="str" />
<field name="keyword_s_dv" type="str" indexed="false" docValues="true" stored="true" />
<field name="keyword_s" type="str" indexed="true" docValues="false" stored="true" />
<!-- . -->
<copyField source="whitespace_stxt" dest="whitespace_m_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_nodv_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_nois_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_max3_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_max6_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_max0_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_maxNeg_stxt"/>
<copyField source="whitespace_stxt" dest="whitespace_plain_txt"/>
<copyField source="whitespace_stxt" dest="whitespace_plain_str"/>
<copyField source="whitespace_f_stxt" dest="whitespace_l_stxt"/>
<copyField source="keyword_stxt" dest="keyword_dv_stxt"/>
<copyField source="keyword_stxt" dest="keyword_s"/>
<copyField source="keyword_stxt" dest="keyword_s_dv"/>
<copyField source="id" dest="id_i"/>
<!-- . -->
<!-- NOTE: explicitly not specifying docValues=true on these, it should be implicit default -->
<fieldType name="whitespace_stxt" class="solr.SortableTextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitespace_max3_stxt" class="solr.SortableTextField" maxCharsForDocValues="3">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitespace_max6_stxt" class="solr.SortableTextField" maxCharsForDocValues="6">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitespace_max0_stxt" class="solr.SortableTextField" maxCharsForDocValues="0">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitespace_maxNeg_stxt" class="solr.SortableTextField" maxCharsForDocValues="-42">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitespace_l_stxt" class="solr.SortableTextField" sortMissingLast="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitespace_f_stxt" class="solr.SortableTextField" sortMissingFirst="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="keyword_stxt" class="solr.SortableTextField">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
<!-- some special edge cases for testing how useDocValuesAsStored (on type or field)
interacts with maxCharsForDocValues
NOTE: 2 types and 6 fields ending in '_usedvs': either '_has_usedvs' or '_negates_usedvs'
-->
<!-- max6 -->
<field name="max6_field_has_usedvs" type="whitespace_max6_stxt" stored="false" useDocValuesAsStored="true" />
<field name="max6_type_has_usedvs" type="max6_type_has_usedvs" />
<field name="max6_field_negates_usedvs" type="max6_type_has_usedvs" useDocValuesAsStored="false" />
<fieldType name="max6_type_has_usedvs" class="solr.SortableTextField" stored="false"
maxCharsForDocValues="6" useDocValuesAsStored="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
</analyzer>
</fieldType>
<!-- max0 -->
<field name="max0_field_has_usedvs" type="whitespace_max0_stxt" stored="false" useDocValuesAsStored="true" />
<field name="max0_type_has_usedvs" type="max0_type_has_usedvs" />
<field name="max0_field_negates_usedvs" type="max0_type_has_usedvs" useDocValuesAsStored="false" />
<fieldType name="max0_type_has_usedvs" class="solr.SortableTextField" stored="false"
maxCharsForDocValues="0" useDocValuesAsStored="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
</analyzer>
</fieldType>
<fieldType name="str" class="solr.StrField"/>
<fieldType name="whitespace_plain_txt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory" />
</analyzer>
</fieldType>
<fieldType name="int" class="${solr.tests.IntegerFieldType}"/>
</schema>

View File

@ -519,6 +519,20 @@ valued. -->
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- See TestMinMaxOnMultiValuedField -->
<field name="val_stxt_s_dv" type="whitespace_stxt" multiValued="true"/>
<field name="val_stxt_missf_s_dv" type="whitespace_stxt" multiValued="true" sortMissingFirst="true"/>
<field name="val_stxt_missl_s_dv" type="whitespace_stxt" multiValued="true" sortMissingLast="true"/>
<field name="val_stxt_s_nodv" type="whitespace_stxt" multiValued="true" docValues="false" />
<!-- NOTE: explicitly not specifying docValues=true, it should be implicit default -->
<fieldType name="whitespace_stxt" class="solr.SortableTextField" indexed="true" stored="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<!-- Field to use to determine and enforce document uniqueness. <!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field Unless this field is marked with required="false", it will be a required field
--> -->

View File

@ -24,6 +24,7 @@ import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.misc.SweetSpotSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.schema.SimilarityFactory; import org.apache.solr.schema.SimilarityFactory;
@ -84,7 +85,6 @@ public class TestBulkSchemaAPI extends RestTestBase {
jetty.stop(); jetty.stop();
jetty = null; jetty = null;
} }
client = null;
if (restTestHarness != null) { if (restTestHarness != null) {
restTestHarness.close(); restTestHarness.close();
} }
@ -840,7 +840,61 @@ public class TestBulkSchemaAPI extends RestTestBase {
map = (Map)ObjectBuilder.getVal(new JSONParser(new StringReader(response))); map = (Map)ObjectBuilder.getVal(new JSONParser(new StringReader(response)));
assertNull(map.get("error")); assertNull(map.get("error"));
} }
public void testSortableTextFieldWithAnalyzer() throws Exception {
String fieldTypeName = "sort_text_type";
String fieldName = "sort_text";
String payload = "{\n" +
" 'add-field-type' : {" +
" 'name' : '" + fieldTypeName + "',\n" +
" 'stored':true,\n" +
" 'indexed':true\n" +
" 'maxCharsForDocValues':6\n" +
" 'class':'solr.SortableTextField',\n" +
" 'analyzer' : {'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'}},\n" +
" },\n"+
" 'add-field' : {\n" +
" 'name':'" + fieldName + "',\n" +
" 'type': '"+fieldTypeName+"',\n" +
" }\n" +
"}\n";
String response = restTestHarness.post("/schema", json(payload));
Map map = (Map) ObjectBuilder.getVal(new JSONParser(new StringReader(response)));
assertNull(response, map.get("errors"));
Map fields = getObj(restTestHarness, fieldName, "fields");
assertNotNull("field " + fieldName + " not created", fields);
assertEquals(0,
getSolrClient().add(Arrays.asList(sdoc("id","1",fieldName,"xxx aaa"),
sdoc("id","2",fieldName,"xxx bbb aaa"),
sdoc("id","3",fieldName,"xxx bbb zzz"))).getStatus());
assertEquals(0, getSolrClient().commit().getStatus());
{
SolrDocumentList docs = getSolrClient().query
(params("q",fieldName+":xxx","sort", fieldName + " asc, id desc")).getResults();
assertEquals(3L, docs.getNumFound());
assertEquals(3L, docs.size());
assertEquals("1", docs.get(0).getFieldValue("id"));
assertEquals("3", docs.get(1).getFieldValue("id"));
assertEquals("2", docs.get(2).getFieldValue("id"));
}
{
SolrDocumentList docs = getSolrClient().query
(params("q",fieldName+":xxx", "sort", fieldName + " desc, id asc")).getResults();
assertEquals(3L, docs.getNumFound());
assertEquals(3L, docs.size());
assertEquals("2", docs.get(0).getFieldValue("id"));
assertEquals("3", docs.get(1).getFieldValue("id"));
assertEquals("1", docs.get(2).getFieldValue("id"));
}
}
public void testSimilarityParser() throws Exception { public void testSimilarityParser() throws Exception {
RestTestHarness harness = restTestHarness; RestTestHarness harness = restTestHarness;

View File

@ -0,0 +1,562 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.junit.Before;
import org.junit.BeforeClass;
import static org.hamcrest.CoreMatchers.instanceOf;
public class TestSortableTextField extends SolrTestCaseJ4 {
protected static final String BIG_CONST
= StringUtils.repeat("x", SortableTextField.DEFAULT_MAX_CHARS_FOR_DOC_VALUES);
@BeforeClass
public static void create() throws Exception {
initCore("solrconfig-minimal.xml","schema-sorting-text.xml");
// sanity check our fields & types...
// these should all use docValues (either explicitly or implicitly)...
for (String n : Arrays.asList("keyword_stxt",
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
assertEquals("type " + ft.getTypeName() + " should have docvalues - schema got changed?",
true, ft.getNamedPropertyValues(true).get("docValues")) ;
}
for (String n : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"whitespace_stxt", "whitespace_nois_stxt",
"whitespace_f_stxt", "whitespace_l_stxt")) {
SchemaField sf = h.getCore().getLatestSchema().getField(n);
assertTrue("field " + sf.getName() + " should have docvalues - schema got changed?",
sf.hasDocValues()) ;
}
{ // this field should *NOT* have docValues .. should behave like a plain old TextField
SchemaField sf = h.getCore().getLatestSchema().getField("whitespace_nodv_stxt");
assertFalse("field " + sf.getName() + " should not have docvalues - schema got changed?",
sf.hasDocValues()) ;
}
}
@Before
public void cleanup() throws Exception {
clearIndex();
}
public void testSimple() throws Exception {
assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?", "whitespace_f_stxt", "aaa bbb"));
assertU(adoc("id","2", "whitespace_stxt", "how now brown dog ?", "whitespace_f_stxt", "bbb aaa"));
assertU(adoc("id","3", "whitespace_stxt", "how now brown cat ?", "whitespace_f_stxt", "xxx yyy"));
assertU(adoc("id","4", "whitespace_stxt", "dog and cat" /* no val for whitespace_f_stxt */));
assertU(commit());
// search & sort
// NOTE: even if the field is indexed=false, should still be able to sort on it
for (String sortf : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt", "whitespace_plain_str")) {
assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
);
assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " desc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=4]"
);
assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " asc")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=2]"
);
assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " desc")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.=2]"
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=3]"
);
// we should still be able to search if docValues="false" (but sort on a diff field)
assertQ(req("q","whitespace_nodv_stxt:cat", "sort", sortf + " asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
);
}
// attempting to sort on docValues="false" field should give an error...
assertQEx("attempting to sort on docValues=false field should give an error",
"when docValues=\"false\"",
req("q","*:*", "sort", "whitespace_nodv_stxt asc"),
ErrorCode.BAD_REQUEST);
// sortMissing - whitespace_f_stxt copyField to whitespace_l_stxt
assertQ(req("q","*:*", "sort", "whitespace_f_stxt asc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=3]"
);
assertQ(req("q","*:*", "sort", "whitespace_f_stxt desc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=1]"
);
assertQ(req("q","*:*", "sort", "whitespace_l_stxt asc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=3]"
, "//result/doc[4]/str[@name='id'][.=4]"
);
assertQ(req("q","*:*", "sort", "whitespace_l_stxt desc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=1]"
, "//result/doc[4]/str[@name='id'][.=4]"
);
}
public void testSimpleSearchAndFacets() throws Exception {
assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?"));
assertU(adoc("id","2", "whitespace_stxt", "how now brown cow ?"));
assertU(adoc("id","3", "whitespace_stxt", "holy cow !"));
assertU(adoc("id","4", "whitespace_stxt", "dog and cat"));
assertU(commit());
// NOTE: even if the field is indexed=false, should still be able to facet on it
for (String facet : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt",
"whitespace_m_stxt", "whitespace_plain_str")) {
for (String search : Arrays.asList("whitespace_stxt", "whitespace_nodv_stxt",
"whitespace_m_stxt", "whitespace_plain_txt")) {
// facet.field
final String fpre = "//lst[@name='facet_fields']/lst[@name='"+facet+"']/";
assertQ(req("q", search + ":cow", "rows", "0",
"facet.field", facet, "facet", "true")
, "//*[@numFound='3']"
, fpre + "int[@name='how now brown cow ?'][.=2]"
, fpre + "int[@name='holy cow !'][.=1]"
, fpre + "int[@name='dog and cat'][.=0]"
);
// json facet
final String jpre = "//lst[@name='facets']/lst[@name='x']/arr[@name='buckets']/";
assertQ(req("q", search + ":cow", "rows", "0",
"json.facet", "{x:{ type: terms, field:'" + facet + "', mincount:0 }}")
, "//*[@numFound='3']"
, jpre + "lst[str[@name='val'][.='how now brown cow ?']][int[@name='count'][.=2]]"
, jpre + "lst[str[@name='val'][.='holy cow !']][int[@name='count'][.=1]]"
, jpre + "lst[str[@name='val'][.='dog and cat']][int[@name='count'][.=0]]"
);
}
}
}
public void testWhiteboxIndexReader() throws Exception {
assertU(adoc("id","1",
"whitespace_stxt", "how now brown cow ?",
"whitespace_m_stxt", "xxx",
"whitespace_m_stxt", "yyy",
"whitespace_f_stxt", "aaa bbb",
"keyword_stxt", "Blarggghhh!"));
assertU(commit());
final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
try {
final LeafReader r = searcher.get().getSlowAtomicReader();
// common cases...
for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
assertEquals("DocValuesType: " + field,
DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType());
assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
assertNotNull("Terms: " + field, r.terms(field));
}
// special cases...
assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
assertEquals(DocValuesType.NONE,
r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
assertNotNull(r.terms("whitespace_nodv_stxt"));
//
assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
assertEquals(DocValuesType.SORTED,
r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
assertNull(r.terms("whitespace_nois_stxt"));
//
assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
assertEquals(DocValuesType.SORTED_SET,
r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
assertNotNull(r.terms("whitespace_m_stxt"));
} finally {
if (null != searcher) {
searcher.decref();
}
}
}
public void testWhiteboxCreateFields() throws Exception {
List<IndexableField> values = null;
// common case...
for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
values = createIndexableFields(field);
assertEquals(field, 2, values.size());
assertThat(field, values.get(0), instanceOf(Field.class));
assertThat(field, values.get(1), instanceOf(SortedDocValuesField.class));
}
// special cases...
values = createIndexableFields("whitespace_nois_stxt");
assertEquals(1, values.size());
assertThat(values.get(0), instanceOf(SortedDocValuesField.class));
//
values = createIndexableFields("whitespace_nodv_stxt");
assertEquals(1, values.size());
assertThat(values.get(0), instanceOf(Field.class));
//
values = createIndexableFields("whitespace_m_stxt");
assertEquals(2, values.size());
assertThat(values.get(0), instanceOf(Field.class));
assertThat(values.get(1), instanceOf(SortedSetDocValuesField.class));
}
private List<IndexableField> createIndexableFields(String fieldName) {
SchemaField sf = h.getCore().getLatestSchema().getField(fieldName);
return sf.getType().createFields(sf, "dummy value");
}
public void testMaxCharsSort() throws Exception {
assertU(adoc("id","1", "whitespace_stxt", "aaa bbb ccc ddd"));
assertU(adoc("id","2", "whitespace_stxt", "aaa bbb xxx yyy"));
assertU(adoc("id","3", "whitespace_stxt", "aaa bbb ccc xxx"));
assertU(adoc("id","4", "whitespace_stxt", "aaa"));
assertU(commit());
// all terms should be searchable in all fields, even if the docvalues are limited
for (String searchF : Arrays.asList("whitespace_stxt", "whitespace_plain_txt",
"whitespace_max3_stxt", "whitespace_max6_stxt",
"whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
// maxChars of 0 or neg should be equivilent to no max at all
for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_plain_str",
"whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
assertQ(req("q", searchF + ":ccc", "sort", sortF + " desc, id asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=1]"
);
assertQ(req("q", searchF + ":ccc", "sort", sortF + " asc, id desc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=3]"
);
}
}
// sorting on a maxChars limited fields should force tie breaker
for (String dir : Arrays.asList("asc", "desc")) {
// for max3, dir shouldn't matter - should always tie..
assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id desc") // max3, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=1]"
);
assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id asc") // max3, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=3]"
, "//result/doc[4]/str[@name='id'][.=4]"
);
}
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id desc") // max6 asc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
, "//result/doc[2]/str[@name='id'][.=3]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=1]"
);
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id asc") // max6 asc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=3]"
);
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id desc") // max6 desc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=1]"
, "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
);
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id asc") // max6 desc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=3]"
, "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
);
// sanity check that the default max is working....
assertU(adoc("id","5", "whitespace_stxt", BIG_CONST + " aaa zzz"));
assertU(adoc("id","6", "whitespace_stxt", BIG_CONST + " bbb zzz "));
assertU(commit());
// for these fields, the tie breaker should be the only thing that matters, regardless of direction...
for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt")) {
for (String dir : Arrays.asList("asc", "desc")) {
assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=5]"
, "//result/doc[2]/str[@name='id'][.=6]"
);
assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id desc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=6]"
, "//result/doc[2]/str[@name='id'][.=5]"
);
}
}
}
/**
* test how various permutations of useDocValuesAsStored and maxCharsForDocValues interact
*/
public void testUseDocValuesAsStored() throws Exception {
ignoreException("when useDocValuesAsStored=true \\(length=");
// first things first...
// unlike most field types, SortableTextField should default to useDocValuesAsStored==false
// (check a handful that should have the default behavior)
for (String n : Arrays.asList("keyword_stxt", "whitespace_max0_stxt", "whitespace_max6_stxt")) {
{
FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
assertEquals("type " + ft.getTypeName() + " should not default to useDocValuesAsStored",
false, ft.useDocValuesAsStored()) ;
}
{
SchemaField sf = h.getCore().getLatestSchema().getField(n);
assertEquals("field " + sf.getName() + " should not default to useDocValuesAsStored",
false, sf.useDocValuesAsStored()) ;
}
}
// but it should be possible to set useDocValuesAsStored=true explicitly on types...
int num_types_found = 0;
for (Map.Entry<String,FieldType> entry : h.getCore().getLatestSchema().getFieldTypes().entrySet()) {
if (entry.getKey().endsWith("_has_usedvs")) {
num_types_found++;
FieldType ft = entry.getValue();
assertEquals("type " + ft.getTypeName() + " has unexpected useDocValuesAsStored value",
true, ft.useDocValuesAsStored()) ;
}
}
assertEquals("sanity check: wrong number of *_has_usedvs types found -- schema changed?",
2, num_types_found);
// ...and it should be possible to set/override useDocValuesAsStored=true on fields...
int num_fields_found = 0;
List<String> xpaths = new ArrayList<>(42);
for (Map.Entry<String,SchemaField> entry : h.getCore().getLatestSchema().getFields().entrySet()) {
if (entry.getKey().endsWith("_usedvs")) {
num_fields_found++;
final SchemaField sf = entry.getValue();
final String name = sf.getName();
// some sanity check before we move on with the rest of our testing...
assertFalse("schema change? field should not be stored=true: " + name, sf.stored());
final boolean usedvs = name.endsWith("_has_usedvs");
assertTrue("schema change broke assumptions: field must be '*_has_usedvs' or '*_negates_usedvs': " +
name, usedvs ^ name.endsWith("_negates_usedvs"));
final boolean max6 = name.startsWith("max6_");
assertTrue("schema change broke assumptions: field must be 'max6_*' or 'max0_*': " +
name, max6 ^ name.startsWith("max0_"));
assertEquals("Unexpected useDocValuesAsStored value for field: " + name,
usedvs, sf.useDocValuesAsStored()) ;
final String docid = ""+num_fields_found;
if (usedvs && max6) {
// if useDocValuesAsStored==true and maxCharsForDocValues=N then longer values should fail
final String doc = adoc("id", docid, name, "apple pear orange");
SolrException ex = expectThrows(SolrException.class, () -> { assertU(doc); });
for (String expect : Arrays.asList("field " + name,
"length=17",
"useDocValuesAsStored=true",
"maxCharsForDocValues=6")) {
assertTrue("exception must mention " + expect + ": " + ex.getMessage(),
ex.getMessage().contains(expect));
}
} else {
// otherwise (useDocValuesAsStored==false *OR* maxCharsForDocValues=0) any value
// should be fine when adding a doc and we should be able to search for it later...
final String val = docid + " apple pear orange " + BIG_CONST;
assertU(adoc("id", docid, name, val));
String doc_xpath = "//result/doc[str[@name='id'][.='"+docid+"']]";
if (usedvs) {
// ...and if it *does* usedvs, then we should defnitely see our value when searching...
doc_xpath = doc_xpath + "[str[@name='"+name+"'][.='"+val+"']]";
} else {
// ...but if not, then we should definitely not see any value for our field...
doc_xpath = doc_xpath + "[not(str[@name='"+name+"'])]";
}
xpaths.add(doc_xpath);
}
}
}
assertEquals("sanity check: wrong number of *_usedvs fields found -- schema changed?",
6, num_fields_found);
// check all our expected docs can be found (with the expected values)
assertU(commit());
xpaths.add("//*[@numFound='"+xpaths.size()+"']");
assertQ(req("q", "*:*", "fl", "*"), xpaths.toArray(new String[xpaths.size()]));
}
/**
* tests that a SortableTextField using KeywordTokenzier (w/docValues) behaves exactly the same as
* StrFields that it's copied to for quering and sorting
*/
public void testRandomStrEquivilentBehavior() throws Exception {
final List<String> test_fields = Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"keyword_s_dv", "keyword_s");
// we use embedded client instead of assertQ: we want to compare the responses from multiple requests
final SolrClient client = new EmbeddedSolrServer(h.getCore());
final int numDocs = atLeast(100);
final int magicIdx = TestUtil.nextInt(random(), 1, numDocs);
String magic = null;
for (int i = 1; i <= numDocs; i++) {
// ideally we'd test all "realistic" unicode string, but EmbeddedSolrServer uses XML request writer
// and has no option to change this so ctrl-characters break the request
final String val = TestUtil.randomSimpleString(random(), 100);
if (i == magicIdx) {
magic = val;
}
assertEquals(0, client.add(sdoc("id", ""+i, "keyword_stxt", val)).getStatus());
}
assertNotNull(magic);
assertEquals(0, client.commit().getStatus());
// query for magic term should match same doc regardless of field (reminder: keyword tokenizer)
// (we need the filter in the unlikely event that magic value with randomly picked twice)
for (String f : test_fields) {
final SolrDocumentList results = client.query(params("q", "{!field f="+f+" v=$v}",
"v", magic,
"fq", "id:" + magicIdx )).getResults();
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + results,
1L, results.getNumFound());
final SolrDocument doc = results.get(0);
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
""+magicIdx, doc.getFieldValue("id"));
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
magic, doc.getFieldValue(f));
}
// do some random id range queries using all 3 fields for sorting. results should be identical
final int numQ = atLeast(10);
for (int i = 0; i < numQ; i++) {
final int hi = TestUtil.nextInt(random(), 1, numDocs-1);
final int lo = TestUtil.nextInt(random(), 1, hi);
final boolean fwd = random().nextBoolean();
SolrDocumentList previous = null;
String prevField = null;
for (String f : test_fields) {
final SolrDocumentList results = client.query(params("q","id_i:["+lo+" TO "+hi+"]",
"sort", f + (fwd ? " asc" : " desc") +
// secondary on id for determinism
", id asc")
).getResults();
assertEquals(results.toString(), (1L + hi - lo), results.getNumFound());
if (null != previous) {
assertEquals(prevField + " vs " + f,
previous.getNumFound(), results.getNumFound());
for (int d = 0; d < results.size(); d++) {
assertEquals(prevField + " vs " + f + ": " + d,
previous.get(d).getFieldValue("id"),
results.get(d).getFieldValue("id"));
assertEquals(prevField + " vs " + f + ": " + d,
previous.get(d).getFieldValue(prevField),
results.get(d).getFieldValue(f));
}
}
previous = results;
prevField = f;
}
}
}
}

View File

@ -59,6 +59,11 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
"date_missf_", "date_missl_", "date_missf_", "date_missl_",
"enum_missf_", "enum_missl_", "enum_missf_", "enum_missl_",
"bool_missf_", "bool_missl_" }, new String [] {"_dv"}); "bool_missf_", "bool_missl_" }, new String [] {"_dv"});
checkFields(new String[] {"stxt_", // no expectation on missing first/last
"stxt_missf_", "stxt_missl_" }, new String [] { "_dv"});
checkFields(new String [] { "stxt_" }, // no expectation on missing first/last
new String [] { "_nodv", "_dv" });
checkFields(new String [] { "stxt_missf_", "stxt_missl_" }, new String [] { "_dv"});
} }
@ -71,8 +76,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
SchemaField sf = schema.getField(f); SchemaField sf = schema.getField(f);
assertTrue(f + " is not multivalued", sf.multiValued()); assertTrue(f + " is not multivalued", sf.multiValued());
assertEquals(f + " doesn't have expected docValues status", assertEquals(f + " doesn't have expected docValues status",
f.contains("dv") || f.endsWith("_p") ((f.contains("dv") || f.endsWith("_p") || Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP))
|| Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP), sf.hasDocValues()); && !f.contains("nodv")),
sf.hasDocValues());
assertEquals(f + " doesn't have expected index status", assertEquals(f + " doesn't have expected index status",
! f.contains("ni"), sf.indexed()); ! f.contains("ni"), sf.indexed());
@ -178,19 +184,27 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
} }
public void testBasicStrings() { public void testBasicStrings() {
checkBasicStrings("val_strs_dv");
}
public void testBasicSortableText() {
checkBasicStrings("val_stxt_s_dv");
checkBasicStrings("val_stxt_missf_s_dv");
checkBasicStrings("val_stxt_missl_s_dv");
}
private void checkBasicStrings(final String field) {
assertU(adoc(sdoc("id", "1", assertU(adoc(sdoc("id", "1",
"val_strs_dv", "dog", field, "dog",
"val_strs_dv", "xyz", field, "xyz",
"val_strs_dv", "cat"))); field, "cat")));
assertU(adoc(sdoc("id", "2"))); // 2 has no val_strs_dv values assertU(adoc(sdoc("id", "2"))); // 2 has no values in tested field
assertU(commit()); assertU(commit());
// id=1: has values // id=1: has values
assertQ(req("q","id:1" assertQ(req("q","id:1"
,"fl","exists_min_str:exists(field(val_strs_dv,min))" ,"fl","exists_min_str:exists(field("+field+",min))"
,"fl","exists_max_str:exists(field(val_strs_dv,max))" ,"fl","exists_max_str:exists(field("+field+",max))"
,"fl","min_str:field(val_strs_dv,min)" ,"fl","min_str:field("+field+",min)"
,"fl","max_str:field(val_strs_dv,max)" ,"fl","max_str:field("+field+",max)"
) )
,"//*[@numFound='1']" ,"//*[@numFound='1']"
@ -201,10 +215,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
); );
// id=2: no values // id=2: no values
assertQ(req("q","id:2" assertQ(req("q","id:2"
,"fl","exists_min_str:exists(field(val_strs_dv,min))" ,"fl","exists_min_str:exists(field("+field+",min))"
,"fl","exists_max_str:exists(field(val_strs_dv,max))" ,"fl","exists_max_str:exists(field("+field+",max))"
,"fl","min_str:field(val_strs_dv,min)" ,"fl","min_str:field("+field+",min)"
,"fl","max_str:field(val_strs_dv,max)" ,"fl","max_str:field("+field+",max)"
) )
,"//*[@numFound='1']" ,"//*[@numFound='1']"
@ -219,6 +233,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
testExpectedSortOrdering("val_strs_dv", false, testExpectedSortOrdering("val_strs_dv", false,
null, "a", "cat", "dog", "wako", "xyz", "zzzzz"); null, "a", "cat", "dog", "wako", "xyz", "zzzzz");
} }
public void testExpectedSortOrderingSortableText() {
testExpectedSortOrdering("val_stxt_s_dv", false,
null, "a", "cat", "dog", "wako", "xyz", "zzzzz");
}
public void testExpectedSortMissingOrderings() { public void testExpectedSortMissingOrderings() {
@ -226,7 +244,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
// (in this simple test) we aren't using a secondary sort, so there is no way to disambiguate // (in this simple test) we aren't using a secondary sort, so there is no way to disambiguate
// docs that have those values from docs that have those *effective* sort values // docs that have those values from docs that have those *effective* sort values
testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz"); testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
testSortMissingMinMax("val_stxt", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
testSortMissingMinMax("val_int", testSortMissingMinMax("val_int",
Integer.MIN_VALUE+1L, -9999, 0, 99999, Integer.MAX_VALUE-1L); Integer.MIN_VALUE+1L, -9999, 0, 99999, Integer.MAX_VALUE-1L);
testSortMissingMinMax("val_long", testSortMissingMinMax("val_long",
@ -382,6 +402,15 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
"docValues", "docValues",
req("q","*:*", "fl", "field(cat,'max')"), req("q","*:*", "fl", "field(cat,'max')"),
SolrException.ErrorCode.BAD_REQUEST); SolrException.ErrorCode.BAD_REQUEST);
assertQEx("no error mentioning field name when asking for max on a non-dv sortable text field",
"val_stxt_s_nodv",
req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"),
SolrException.ErrorCode.BAD_REQUEST);
assertQEx("no error mentioning 'docValues' when asking for max on a non-dv sortable field",
"docValues",
req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"),
SolrException.ErrorCode.BAD_REQUEST);
} }

View File

@ -20,7 +20,7 @@ An analyzer examines the text of fields and generates a token stream.
Analyzers are specified as a child of the `<fieldType>` element in the `schema.xml` configuration file (in the same `conf/` directory as `solrconfig.xml`). Analyzers are specified as a child of the `<fieldType>` element in the `schema.xml` configuration file (in the same `conf/` directory as `solrconfig.xml`).
In normal usage, only fields of type `solr.TextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `<analyzer>` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example: In normal usage, only fields of type `solr.TextField` or `solr.SortableTextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `<analyzer>` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example:
[source,xml] [source,xml]
---- ----

View File

@ -37,10 +37,11 @@ Solr can sort query responses according to:
* Document scores * Document scores
* <<function-queries.adoc#sort-by-function,Function results>> * <<function-queries.adoc#sort-by-function,Function results>>
* The value of any primative field (numerics, string, boolean, dates, etc...) which has `docValues="true"` (or `multiValued="false"` and `indexed="true"` in which case the indexed terms will used to build DocValue like structures on the fly at runtime) * The value of any primative field (numerics, string, boolean, dates, etc...) which has `docValues="true"` (or `multiValued="false"` and `indexed="true"` in which case the indexed terms will used to build DocValue like structures on the fly at runtime)
* A TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term. * A SortableTextField which implicitly uses `docValues="true"` by default to allow sorting on the original input string regardless of the analyzers used for Searching.
* A single-valued TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term per document. TextField does not support docValues="true", but a DocValue like structure will be built on the fly at runtime.
** *NOTE:* If you want to be able to sort on a field whose contents you want to tokenize to facilitate searching, <<copying-fields.adoc#copying-fields,use a `copyField` directive>> in the the Schema to clone the field. Then search on the field and sort on its clone. ** *NOTE:* If you want to be able to sort on a field whose contents you want to tokenize to facilitate searching, <<copying-fields.adoc#copying-fields,use a `copyField` directive>> in the the Schema to clone the field. Then search on the field and sort on its clone.
In the case of primative fields that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. This default behavior is equivilent to explicitly sorting using the 2 argument `<<function-queries.adoc#field-function,field()>>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc` In the case of primative fields, or SortableTextFields, that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. This default behavior is equivilent to explicitly sorting using the 2 argument `<<function-queries.adoc#field-function,field()>>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc`
The table below explains how Solr responds to various settings of the `sort` parameter. The table below explains how Solr responds to various settings of the `sort` parameter.

View File

@ -69,6 +69,8 @@ Configuration and usage of PreAnalyzedField is documented in the section <<work
|StrField |String (UTF-8 encoded string or Unicode). Strings are intended for small fields and are _not_ tokenized or analyzed in any way. They have a hard limit of slightly less than 32K. |StrField |String (UTF-8 encoded string or Unicode). Strings are intended for small fields and are _not_ tokenized or analyzed in any way. They have a hard limit of slightly less than 32K.
|SortableTextField |A specialized version of TextField that allows (and defaults to) `docValues="true"` for sorting on the first 1024 characters of the original string prior to analysis -- the number of characters used for sorting can be overridden with the `maxCharsForDocValues` attribute.
|TextField |Text, usually multiple words or tokens. |TextField |Text, usually multiple words or tokens.
|TrieDateField |*Deprecated*. Use DatePointField instead. |TrieDateField |*Deprecated*. Use DatePointField instead.
@ -91,4 +93,4 @@ Configuration and usage of PreAnalyzedField is documented in the section <<work
NOTE: All Trie* numeric and date field types have been deprecated in favor of *Point field types. NOTE: All Trie* numeric and date field types have been deprecated in favor of *Point field types.
Point field types are better at range queries (speed, memory, disk), however simple field:value queries underperform Point field types are better at range queries (speed, memory, disk), however simple field:value queries underperform
relative to Trie. Either accept this, or continue to use Trie fields. relative to Trie. Either accept this, or continue to use Trie fields.
This shortcoming may be addressed in a future release. This shortcoming may be addressed in a future release.