From 95122e14481a4dd623e184ca261f8bf158fd3a7c Mon Sep 17 00:00:00 2001 From: Chris Hostetter Date: Thu, 1 Feb 2018 10:40:29 -0700 Subject: [PATCH] SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also sorting/faceting just like StrField --- solr/CHANGES.txt | 4 + .../apache/solr/schema/SortableTextField.java | 215 +++++++ .../org/apache/solr/schema/TextField.java | 9 +- .../collection1/conf/schema-sorting-text.xml | 149 +++++ .../solr/collection1/conf/schema11.xml | 14 + .../solr/rest/schema/TestBulkSchemaAPI.java | 56 +- .../solr/schema/TestSortableTextField.java | 562 ++++++++++++++++++ .../TestMinMaxOnMultiValuedField.java | 59 +- solr/solr-ref-guide/src/analyzers.adoc | 2 +- .../src/common-query-parameters.adoc | 5 +- .../src/field-types-included-with-solr.adoc | 4 +- 11 files changed, 1057 insertions(+), 22 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/schema/SortableTextField.java create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-sorting-text.xml create mode 100644 solr/core/src/test/org/apache/solr/schema/TestSortableTextField.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 223e8714be8..60e4aba2a0f 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -149,6 +149,10 @@ New Features * SOLR-11890: Add multiKmeans Stream Evaluator (Joel Bernstein) +* SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also + sorting/faceting just like StrField. By default uses only the first 1024 chars of the original + input string values, but this is configurable. (hossman) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/schema/SortableTextField.java b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java new file mode 100644 index 00000000000..c30b1b1216f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; +import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.search.QParser; +import org.apache.solr.uninverting.UninvertingReader.Type; + +/** + *

+ * SortableTextField is a specialized form of {@link TextField} that supports + * Sorting and ValueSource functions, using docValues built from the first + * maxCharsForDocValues characters of the original (pre-analyzed) String values of this field. + *

+ *

+ * The implicit default value for maxCharsForDocValues is 1024. If a field + * type instance is configured with maxCharsForDocValues <= 0 this overrides the default + * with an effective value of "no limit" ({@link Integer#MAX_VALUE}). + *

+ *

+ * Instances of this FieldType implicitly default to docValues="true" unless explicitly + * configured with docValues="false". + *

+ *

+ * Just like {@link StrField}, instances of this field that are multiValued="true" support + * the field(name,min|max) function, and implicitly sort on min|max depending + * on the asc|desc direction selector. + *

+ * + *

+ * NOTE: Unlike most other FieldTypes, this class defaults to + * useDocValuesAsStored="false". If an instance of this type (or a field that uses this type) + * overrides this behavior to set useDocValuesAsStored="true" then instead of truncating the + * original string value based on the effective value of maxCharsForDocValues, this class + * will reject any documents w/a field value longer then that limit -- causing the document update to fail. + * This behavior exists to prevent situations that could result in a search client reieving only a truncated + * version of the original field value in place of a stored value. + *

+ */ +public class SortableTextField extends TextField { + + public static final int DEFAULT_MAX_CHARS_FOR_DOC_VALUES = 1024; + + private int maxCharsForDocValues = DEFAULT_MAX_CHARS_FOR_DOC_VALUES; + + protected void init(IndexSchema schema, Map args) { + { + final String maxS = args.remove("maxCharsForDocValues"); + if (maxS != null) { + maxCharsForDocValues = Integer.parseInt(maxS); + if (maxCharsForDocValues <= 0) { + maxCharsForDocValues = Integer.MAX_VALUE; + } + } + } + + // by the time our init() is called, super.setArgs has already removed & procesesd any explicit + // "docValues=foo" or useDocValuesAsStored=bar args... + // - If the user explicitly said docValues=false, we want to respect that and not change it. + // - if the user didn't explicit specify anything, then we want to implicitly *default* docValues=true + // - The inverse is true for useDocValuesAsStored=true: + // - if explict, then respect it; else implicitly default to useDocValuesAsStored=false + // ...lucky for us, setArgs preserved info about explicitly set true|false properties... + if (! on(falseProperties, DOC_VALUES)) { + properties |= DOC_VALUES; + } + if (! on(trueProperties, USE_DOCVALUES_AS_STORED)) { + properties &= ~USE_DOCVALUES_AS_STORED; + } + + super.init(schema, args); + } + + @Override + public List createFields(SchemaField field, Object value) { + IndexableField f = createField( field, value); + if (! field.hasDocValues()) { + return Collections.singletonList(f); + } + final String origString = value.toString(); + final int origLegth = origString.length(); + final boolean truncate = maxCharsForDocValues < origLegth; + if (field.useDocValuesAsStored() && truncate) { + // if the user has explicitly configured useDocValuesAsStored, we need a special + // check to fail docs where the values are too long -- we don't want to silently + // accept and then have search queries returning partial values + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "Can not use field " + field.getName() + " with values longer then maxCharsForDocValues=" + + maxCharsForDocValues + " when useDocValuesAsStored=true (length=" + origLegth + ")"); + } + final BytesRef bytes = new BytesRef(truncate ? origString.subSequence(0, maxCharsForDocValues) : origString); + + final IndexableField docval = field.multiValued() + ? new SortedSetDocValuesField(field.getName(), bytes) + : new SortedDocValuesField(field.getName(), bytes); + + if (null == f) { + return Collections.singletonList(docval); + } + return Arrays.asList(f, docval); + } + + + /** + * {@inheritDoc} + * this field type supports DocValues, this method is always a No-Op + */ + @Override + protected void checkSupportsDocValues() { + // No-Op + } + + @Override + public SortField getSortField(SchemaField field, boolean reverse) { + if (! field.hasDocValues()) { + // type defaults to docValues=true, so error msg from perspective that + // either type or field must have docValues="false" + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Can not sort on this type of field when docValues=\"false\", field: " + field.getName()); + } + + // NOTE: we explicitly bypass super.getSortField so that our getDefaultMultiValueSelectorForSort + // is used and we don't get the historic Uninversion behavior of TextField. + return getStringSort(field, reverse); + } + + @Override + public ValueSource getValueSource(SchemaField field, QParser parser) { + if (! field.hasDocValues()) { + // type defaults to docValues=true, so error msg from perspective that + // either type or field must have docValues="false" + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Can not use ValueSource on this type of field when docValues=\"false\", field: " + field.getName()); + } + return super.getValueSource(field, parser); + } + + @Override + public MultiValueSelector getDefaultMultiValueSelectorForSort(SchemaField field, boolean reverse) { + return reverse ? MultiValueSelector.MAX : MultiValueSelector.MIN; + } + + @Override + public ValueSource getSingleValueSource(MultiValueSelector choice, SchemaField field, QParser parser) { + // trivial base case + if (!field.multiValued()) { + // single value matches any selector + return getValueSource(field, parser); + } + + // See LUCENE-6709 + if (! field.hasDocValues()) { + // type defaults to docValues=true, so error msg from perspective that + // either type or field must have docValues="false" + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Can not select '" + choice.toString() + "' value from multivalued field ("+ + field.getName() +") when docValues=\"false\", field: " + field.getName()); + } + SortedSetSelector.Type selectorType = choice.getSortedSetSelectorType(); + if (null == selectorType) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + choice.toString() + " is not a supported option for picking a single value" + + " from the multivalued field: " + field.getName() + + " (type: " + this.getTypeName() + ")"); + } + + return new SortedSetFieldSource(field.getName(), selectorType); + } + + /** + * {@inheritDoc} + * this field type is not uninvertable, this method always returns null + */ + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } + + /** + * {@inheritDoc} + * This implementation always returns false. + */ + @Override + public boolean multiValuedFieldCache() { + return false; + } + +} diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index d6bfd7b2fd2..8920c53a2dd 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -118,8 +118,13 @@ public class TextField extends FieldType { @Override public SortField getSortField(SchemaField field, boolean reverse) { /* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */ - return getSortedSetSortField(field, SortedSetSelector.Type.MIN, reverse, - SortField.STRING_FIRST, SortField.STRING_LAST); + return getSortedSetSortField(field, + // historical behavior based on how the early versions of the FieldCache + // would deal with multiple indexed terms in a singled valued field... + // + // Always use the 'min' value from the (Uninverted) "psuedo doc values" + SortedSetSelector.Type.MIN, + reverse, SortField.STRING_FIRST, SortField.STRING_LAST); } @Override diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sorting-text.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sorting-text.xml new file mode 100644 index 00000000000..cf526b765b2 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sorting-text.xml @@ -0,0 +1,149 @@ + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema11.xml b/solr/core/src/test-files/solr/collection1/conf/schema11.xml index d09e2097247..6f38e7fb8ab 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml @@ -519,6 +519,20 @@ valued. --> + + + + + + + + + + + + + + diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java index 4bebeca2ac2..3cc07c7a812 100644 --- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java +++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.solr.common.SolrDocumentList; import org.apache.solr.core.SolrCore; import org.apache.solr.core.CoreContainer; import org.apache.solr.schema.SimilarityFactory; @@ -84,7 +85,6 @@ public class TestBulkSchemaAPI extends RestTestBase { jetty.stop(); jetty = null; } - client = null; if (restTestHarness != null) { restTestHarness.close(); } @@ -840,7 +840,61 @@ public class TestBulkSchemaAPI extends RestTestBase { map = (Map)ObjectBuilder.getVal(new JSONParser(new StringReader(response))); assertNull(map.get("error")); } + public void testSortableTextFieldWithAnalyzer() throws Exception { + String fieldTypeName = "sort_text_type"; + String fieldName = "sort_text"; + String payload = "{\n" + + " 'add-field-type' : {" + + " 'name' : '" + fieldTypeName + "',\n" + + " 'stored':true,\n" + + " 'indexed':true\n" + + " 'maxCharsForDocValues':6\n" + + " 'class':'solr.SortableTextField',\n" + + " 'analyzer' : {'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'}},\n" + + " },\n"+ + " 'add-field' : {\n" + + " 'name':'" + fieldName + "',\n" + + " 'type': '"+fieldTypeName+"',\n" + + " }\n" + + "}\n"; + String response = restTestHarness.post("/schema", json(payload)); + + Map map = (Map) ObjectBuilder.getVal(new JSONParser(new StringReader(response))); + assertNull(response, map.get("errors")); + + Map fields = getObj(restTestHarness, fieldName, "fields"); + assertNotNull("field " + fieldName + " not created", fields); + + assertEquals(0, + getSolrClient().add(Arrays.asList(sdoc("id","1",fieldName,"xxx aaa"), + sdoc("id","2",fieldName,"xxx bbb aaa"), + sdoc("id","3",fieldName,"xxx bbb zzz"))).getStatus()); + + assertEquals(0, getSolrClient().commit().getStatus()); + { + SolrDocumentList docs = getSolrClient().query + (params("q",fieldName+":xxx","sort", fieldName + " asc, id desc")).getResults(); + + assertEquals(3L, docs.getNumFound()); + assertEquals(3L, docs.size()); + assertEquals("1", docs.get(0).getFieldValue("id")); + assertEquals("3", docs.get(1).getFieldValue("id")); + assertEquals("2", docs.get(2).getFieldValue("id")); + } + { + SolrDocumentList docs = getSolrClient().query + (params("q",fieldName+":xxx", "sort", fieldName + " desc, id asc")).getResults(); + + assertEquals(3L, docs.getNumFound()); + assertEquals(3L, docs.size()); + assertEquals("2", docs.get(0).getFieldValue("id")); + assertEquals("3", docs.get(1).getFieldValue("id")); + assertEquals("1", docs.get(2).getFieldValue("id")); + } + + } + public void testSimilarityParser() throws Exception { RestTestHarness harness = restTestHarness; diff --git a/solr/core/src/test/org/apache/solr/schema/TestSortableTextField.java b/solr/core/src/test/org/apache/solr/schema/TestSortableTextField.java new file mode 100644 index 00000000000..2e861f54d73 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/TestSortableTextField.java @@ -0,0 +1,562 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.TestUtil; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; + +import org.junit.Before; +import org.junit.BeforeClass; +import static org.hamcrest.CoreMatchers.instanceOf; + +public class TestSortableTextField extends SolrTestCaseJ4 { + + protected static final String BIG_CONST + = StringUtils.repeat("x", SortableTextField.DEFAULT_MAX_CHARS_FOR_DOC_VALUES); + + @BeforeClass + public static void create() throws Exception { + initCore("solrconfig-minimal.xml","schema-sorting-text.xml"); + + // sanity check our fields & types... + + // these should all use docValues (either explicitly or implicitly)... + for (String n : Arrays.asList("keyword_stxt", + "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) { + + FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n); + assertEquals("type " + ft.getTypeName() + " should have docvalues - schema got changed?", + true, ft.getNamedPropertyValues(true).get("docValues")) ; + } + for (String n : Arrays.asList("keyword_stxt", "keyword_dv_stxt", + "whitespace_stxt", "whitespace_nois_stxt", + "whitespace_f_stxt", "whitespace_l_stxt")) { + + SchemaField sf = h.getCore().getLatestSchema().getField(n); + assertTrue("field " + sf.getName() + " should have docvalues - schema got changed?", + sf.hasDocValues()) ; + } + + { // this field should *NOT* have docValues .. should behave like a plain old TextField + SchemaField sf = h.getCore().getLatestSchema().getField("whitespace_nodv_stxt"); + assertFalse("field " + sf.getName() + " should not have docvalues - schema got changed?", + sf.hasDocValues()) ; + } + + } + + @Before + public void cleanup() throws Exception { + clearIndex(); + } + + public void testSimple() throws Exception { + assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?", "whitespace_f_stxt", "aaa bbb")); + assertU(adoc("id","2", "whitespace_stxt", "how now brown dog ?", "whitespace_f_stxt", "bbb aaa")); + assertU(adoc("id","3", "whitespace_stxt", "how now brown cat ?", "whitespace_f_stxt", "xxx yyy")); + assertU(adoc("id","4", "whitespace_stxt", "dog and cat" /* no val for whitespace_f_stxt */)); + + assertU(commit()); + + // search & sort + // NOTE: even if the field is indexed=false, should still be able to sort on it + for (String sortf : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt", "whitespace_plain_str")) { + assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " asc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=4]" + , "//result/doc[2]/str[@name='id'][.=3]" + ); + assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " desc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=3]" + , "//result/doc[2]/str[@name='id'][.=4]" + ); + assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " asc") + , "//*[@numFound='3']" + , "//result/doc[1]/str[@name='id'][.=3]" + , "//result/doc[2]/str[@name='id'][.=1]" + , "//result/doc[3]/str[@name='id'][.=2]" + ); + assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " desc") + , "//*[@numFound='3']" + , "//result/doc[1]/str[@name='id'][.=2]" + , "//result/doc[2]/str[@name='id'][.=1]" + , "//result/doc[3]/str[@name='id'][.=3]" + ); + + // we should still be able to search if docValues="false" (but sort on a diff field) + assertQ(req("q","whitespace_nodv_stxt:cat", "sort", sortf + " asc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=4]" + , "//result/doc[2]/str[@name='id'][.=3]" + ); + } + + // attempting to sort on docValues="false" field should give an error... + assertQEx("attempting to sort on docValues=false field should give an error", + "when docValues=\"false\"", + req("q","*:*", "sort", "whitespace_nodv_stxt asc"), + ErrorCode.BAD_REQUEST); + + // sortMissing - whitespace_f_stxt copyField to whitespace_l_stxt + assertQ(req("q","*:*", "sort", "whitespace_f_stxt asc") + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=4]" + , "//result/doc[2]/str[@name='id'][.=1]" + , "//result/doc[3]/str[@name='id'][.=2]" + , "//result/doc[4]/str[@name='id'][.=3]" + ); + assertQ(req("q","*:*", "sort", "whitespace_f_stxt desc") + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=4]" + , "//result/doc[2]/str[@name='id'][.=3]" + , "//result/doc[3]/str[@name='id'][.=2]" + , "//result/doc[4]/str[@name='id'][.=1]" + ); + assertQ(req("q","*:*", "sort", "whitespace_l_stxt asc") + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=1]" + , "//result/doc[2]/str[@name='id'][.=2]" + , "//result/doc[3]/str[@name='id'][.=3]" + , "//result/doc[4]/str[@name='id'][.=4]" + ); + assertQ(req("q","*:*", "sort", "whitespace_l_stxt desc") + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=3]" + , "//result/doc[2]/str[@name='id'][.=2]" + , "//result/doc[3]/str[@name='id'][.=1]" + , "//result/doc[4]/str[@name='id'][.=4]" + ); + } + + public void testSimpleSearchAndFacets() throws Exception { + assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?")); + assertU(adoc("id","2", "whitespace_stxt", "how now brown cow ?")); + assertU(adoc("id","3", "whitespace_stxt", "holy cow !")); + assertU(adoc("id","4", "whitespace_stxt", "dog and cat")); + + assertU(commit()); + + // NOTE: even if the field is indexed=false, should still be able to facet on it + for (String facet : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt", + "whitespace_m_stxt", "whitespace_plain_str")) { + for (String search : Arrays.asList("whitespace_stxt", "whitespace_nodv_stxt", + "whitespace_m_stxt", "whitespace_plain_txt")) { + // facet.field + final String fpre = "//lst[@name='facet_fields']/lst[@name='"+facet+"']/"; + assertQ(req("q", search + ":cow", "rows", "0", + "facet.field", facet, "facet", "true") + , "//*[@numFound='3']" + , fpre + "int[@name='how now brown cow ?'][.=2]" + , fpre + "int[@name='holy cow !'][.=1]" + , fpre + "int[@name='dog and cat'][.=0]" + ); + + // json facet + final String jpre = "//lst[@name='facets']/lst[@name='x']/arr[@name='buckets']/"; + assertQ(req("q", search + ":cow", "rows", "0", + "json.facet", "{x:{ type: terms, field:'" + facet + "', mincount:0 }}") + , "//*[@numFound='3']" + , jpre + "lst[str[@name='val'][.='how now brown cow ?']][int[@name='count'][.=2]]" + , jpre + "lst[str[@name='val'][.='holy cow !']][int[@name='count'][.=1]]" + , jpre + "lst[str[@name='val'][.='dog and cat']][int[@name='count'][.=0]]" + ); + + } + } + } + + + public void testWhiteboxIndexReader() throws Exception { + assertU(adoc("id","1", + "whitespace_stxt", "how now brown cow ?", + "whitespace_m_stxt", "xxx", + "whitespace_m_stxt", "yyy", + "whitespace_f_stxt", "aaa bbb", + "keyword_stxt", "Blarggghhh!")); + assertU(commit()); + + final RefCounted searcher = h.getCore().getNewestSearcher(false); + try { + final LeafReader r = searcher.get().getSlowAtomicReader(); + + // common cases... + for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", + "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) { + assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field)); + assertEquals("DocValuesType: " + field, + DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType()); + assertNotNull("DocValues: " + field, r.getSortedDocValues(field)); + assertNotNull("Terms: " + field, r.terms(field)); + + } + + // special cases... + assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt")); + assertEquals(DocValuesType.NONE, + r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType()); + assertNull(r.getSortedDocValues("whitespace_nodv_stxt")); + assertNotNull(r.terms("whitespace_nodv_stxt")); + // + assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt")); + assertEquals(DocValuesType.SORTED, + r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType()); + assertNotNull(r.getSortedDocValues("whitespace_nois_stxt")); + assertNull(r.terms("whitespace_nois_stxt")); + // + assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt")); + assertEquals(DocValuesType.SORTED_SET, + r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType()); + assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt")); + assertNotNull(r.terms("whitespace_m_stxt")); + + } finally { + if (null != searcher) { + searcher.decref(); + } + } + } + + public void testWhiteboxCreateFields() throws Exception { + List values = null; + + // common case... + for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", + "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) { + values = createIndexableFields(field); + assertEquals(field, 2, values.size()); + assertThat(field, values.get(0), instanceOf(Field.class)); + assertThat(field, values.get(1), instanceOf(SortedDocValuesField.class)); + } + + // special cases... + values = createIndexableFields("whitespace_nois_stxt"); + assertEquals(1, values.size()); + assertThat(values.get(0), instanceOf(SortedDocValuesField.class)); + // + values = createIndexableFields("whitespace_nodv_stxt"); + assertEquals(1, values.size()); + assertThat(values.get(0), instanceOf(Field.class)); + // + values = createIndexableFields("whitespace_m_stxt"); + assertEquals(2, values.size()); + assertThat(values.get(0), instanceOf(Field.class)); + assertThat(values.get(1), instanceOf(SortedSetDocValuesField.class)); + } + private List createIndexableFields(String fieldName) { + SchemaField sf = h.getCore().getLatestSchema().getField(fieldName); + return sf.getType().createFields(sf, "dummy value"); + } + + public void testMaxCharsSort() throws Exception { + assertU(adoc("id","1", "whitespace_stxt", "aaa bbb ccc ddd")); + assertU(adoc("id","2", "whitespace_stxt", "aaa bbb xxx yyy")); + assertU(adoc("id","3", "whitespace_stxt", "aaa bbb ccc xxx")); + assertU(adoc("id","4", "whitespace_stxt", "aaa")); + assertU(commit()); + + // all terms should be searchable in all fields, even if the docvalues are limited + for (String searchF : Arrays.asList("whitespace_stxt", "whitespace_plain_txt", + "whitespace_max3_stxt", "whitespace_max6_stxt", + "whitespace_max0_stxt", "whitespace_maxNeg_stxt")) { + // maxChars of 0 or neg should be equivilent to no max at all + for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_plain_str", + "whitespace_max0_stxt", "whitespace_maxNeg_stxt")) { + + assertQ(req("q", searchF + ":ccc", "sort", sortF + " desc, id asc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=3]" + , "//result/doc[2]/str[@name='id'][.=1]" + ); + + assertQ(req("q", searchF + ":ccc", "sort", sortF + " asc, id desc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=1]" + , "//result/doc[2]/str[@name='id'][.=3]" + ); + } + } + + // sorting on a maxChars limited fields should force tie breaker + for (String dir : Arrays.asList("asc", "desc")) { + // for max3, dir shouldn't matter - should always tie.. + assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id desc") // max3, id desc + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=4]" + , "//result/doc[2]/str[@name='id'][.=3]" + , "//result/doc[3]/str[@name='id'][.=2]" + , "//result/doc[4]/str[@name='id'][.=1]" + ); + assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id asc") // max3, id desc + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=1]" + , "//result/doc[2]/str[@name='id'][.=2]" + , "//result/doc[3]/str[@name='id'][.=3]" + , "//result/doc[4]/str[@name='id'][.=4]" + ); + } + assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id desc") // max6 asc, id desc + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed + , "//result/doc[2]/str[@name='id'][.=3]" + , "//result/doc[3]/str[@name='id'][.=2]" + , "//result/doc[4]/str[@name='id'][.=1]" + ); + assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id asc") // max6 asc, id desc + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed + , "//result/doc[2]/str[@name='id'][.=1]" + , "//result/doc[3]/str[@name='id'][.=2]" + , "//result/doc[4]/str[@name='id'][.=3]" + ); + assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id desc") // max6 desc, id desc + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=3]" + , "//result/doc[2]/str[@name='id'][.=2]" + , "//result/doc[3]/str[@name='id'][.=1]" + , "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed + ); + assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id asc") // max6 desc, id desc + , "//*[@numFound='4']" + , "//result/doc[1]/str[@name='id'][.=1]" + , "//result/doc[2]/str[@name='id'][.=2]" + , "//result/doc[3]/str[@name='id'][.=3]" + , "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed + ); + + // sanity check that the default max is working.... + assertU(adoc("id","5", "whitespace_stxt", BIG_CONST + " aaa zzz")); + assertU(adoc("id","6", "whitespace_stxt", BIG_CONST + " bbb zzz ")); + assertU(commit()); + // for these fields, the tie breaker should be the only thing that matters, regardless of direction... + for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt")) { + for (String dir : Arrays.asList("asc", "desc")) { + assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id asc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=5]" + , "//result/doc[2]/str[@name='id'][.=6]" + ); + assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id desc") + , "//*[@numFound='2']" + , "//result/doc[1]/str[@name='id'][.=6]" + , "//result/doc[2]/str[@name='id'][.=5]" + ); + } + } + } + + /** + * test how various permutations of useDocValuesAsStored and maxCharsForDocValues interact + */ + public void testUseDocValuesAsStored() throws Exception { + ignoreException("when useDocValuesAsStored=true \\(length="); + + // first things first... + // unlike most field types, SortableTextField should default to useDocValuesAsStored==false + // (check a handful that should have the default behavior) + for (String n : Arrays.asList("keyword_stxt", "whitespace_max0_stxt", "whitespace_max6_stxt")) { + { + FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n); + assertEquals("type " + ft.getTypeName() + " should not default to useDocValuesAsStored", + false, ft.useDocValuesAsStored()) ; + } + { + SchemaField sf = h.getCore().getLatestSchema().getField(n); + assertEquals("field " + sf.getName() + " should not default to useDocValuesAsStored", + false, sf.useDocValuesAsStored()) ; + } + } + + // but it should be possible to set useDocValuesAsStored=true explicitly on types... + int num_types_found = 0; + for (Map.Entry entry : h.getCore().getLatestSchema().getFieldTypes().entrySet()) { + if (entry.getKey().endsWith("_has_usedvs")) { + num_types_found++; + FieldType ft = entry.getValue(); + assertEquals("type " + ft.getTypeName() + " has unexpected useDocValuesAsStored value", + true, ft.useDocValuesAsStored()) ; + } + } + assertEquals("sanity check: wrong number of *_has_usedvs types found -- schema changed?", + 2, num_types_found); + + + // ...and it should be possible to set/override useDocValuesAsStored=true on fields... + int num_fields_found = 0; + List xpaths = new ArrayList<>(42); + for (Map.Entry entry : h.getCore().getLatestSchema().getFields().entrySet()) { + if (entry.getKey().endsWith("_usedvs")) { + num_fields_found++; + final SchemaField sf = entry.getValue(); + final String name = sf.getName(); + + // some sanity check before we move on with the rest of our testing... + assertFalse("schema change? field should not be stored=true: " + name, sf.stored()); + final boolean usedvs = name.endsWith("_has_usedvs"); + assertTrue("schema change broke assumptions: field must be '*_has_usedvs' or '*_negates_usedvs': " + + name, usedvs ^ name.endsWith("_negates_usedvs")); + final boolean max6 = name.startsWith("max6_"); + assertTrue("schema change broke assumptions: field must be 'max6_*' or 'max0_*': " + + name, max6 ^ name.startsWith("max0_")); + + assertEquals("Unexpected useDocValuesAsStored value for field: " + name, + usedvs, sf.useDocValuesAsStored()) ; + + final String docid = ""+num_fields_found; + if (usedvs && max6) { + // if useDocValuesAsStored==true and maxCharsForDocValues=N then longer values should fail + + final String doc = adoc("id", docid, name, "apple pear orange"); + SolrException ex = expectThrows(SolrException.class, () -> { assertU(doc); }); + for (String expect : Arrays.asList("field " + name, + "length=17", + "useDocValuesAsStored=true", + "maxCharsForDocValues=6")) { + assertTrue("exception must mention " + expect + ": " + ex.getMessage(), + ex.getMessage().contains(expect)); + } + } else { + // otherwise (useDocValuesAsStored==false *OR* maxCharsForDocValues=0) any value + // should be fine when adding a doc and we should be able to search for it later... + final String val = docid + " apple pear orange " + BIG_CONST; + assertU(adoc("id", docid, name, val)); + String doc_xpath = "//result/doc[str[@name='id'][.='"+docid+"']]"; + + if (usedvs) { + // ...and if it *does* usedvs, then we should defnitely see our value when searching... + doc_xpath = doc_xpath + "[str[@name='"+name+"'][.='"+val+"']]"; + } else { + // ...but if not, then we should definitely not see any value for our field... + doc_xpath = doc_xpath + "[not(str[@name='"+name+"'])]"; + } + xpaths.add(doc_xpath); + } + } + } + assertEquals("sanity check: wrong number of *_usedvs fields found -- schema changed?", + 6, num_fields_found); + + // check all our expected docs can be found (with the expected values) + assertU(commit()); + xpaths.add("//*[@numFound='"+xpaths.size()+"']"); + assertQ(req("q", "*:*", "fl", "*"), xpaths.toArray(new String[xpaths.size()])); + } + + + + /** + * tests that a SortableTextField using KeywordTokenzier (w/docValues) behaves exactly the same as + * StrFields that it's copied to for quering and sorting + */ + public void testRandomStrEquivilentBehavior() throws Exception { + final List test_fields = Arrays.asList("keyword_stxt", "keyword_dv_stxt", + "keyword_s_dv", "keyword_s"); + // we use embedded client instead of assertQ: we want to compare the responses from multiple requests + final SolrClient client = new EmbeddedSolrServer(h.getCore()); + + final int numDocs = atLeast(100); + final int magicIdx = TestUtil.nextInt(random(), 1, numDocs); + String magic = null; + for (int i = 1; i <= numDocs; i++) { + + // ideally we'd test all "realistic" unicode string, but EmbeddedSolrServer uses XML request writer + // and has no option to change this so ctrl-characters break the request + final String val = TestUtil.randomSimpleString(random(), 100); + if (i == magicIdx) { + magic = val; + } + assertEquals(0, client.add(sdoc("id", ""+i, "keyword_stxt", val)).getStatus()); + + } + assertNotNull(magic); + + assertEquals(0, client.commit().getStatus()); + + // query for magic term should match same doc regardless of field (reminder: keyword tokenizer) + // (we need the filter in the unlikely event that magic value with randomly picked twice) + for (String f : test_fields) { + + final SolrDocumentList results = client.query(params("q", "{!field f="+f+" v=$v}", + "v", magic, + "fq", "id:" + magicIdx )).getResults(); + assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + results, + 1L, results.getNumFound()); + final SolrDocument doc = results.get(0); + assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc, + ""+magicIdx, doc.getFieldValue("id")); + assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc, + magic, doc.getFieldValue(f)); + } + + // do some random id range queries using all 3 fields for sorting. results should be identical + final int numQ = atLeast(10); + for (int i = 0; i < numQ; i++) { + final int hi = TestUtil.nextInt(random(), 1, numDocs-1); + final int lo = TestUtil.nextInt(random(), 1, hi); + final boolean fwd = random().nextBoolean(); + + SolrDocumentList previous = null; + String prevField = null; + for (String f : test_fields) { + final SolrDocumentList results = client.query(params("q","id_i:["+lo+" TO "+hi+"]", + "sort", f + (fwd ? " asc" : " desc") + + // secondary on id for determinism + ", id asc") + ).getResults(); + assertEquals(results.toString(), (1L + hi - lo), results.getNumFound()); + if (null != previous) { + assertEquals(prevField + " vs " + f, + previous.getNumFound(), results.getNumFound()); + for (int d = 0; d < results.size(); d++) { + assertEquals(prevField + " vs " + f + ": " + d, + previous.get(d).getFieldValue("id"), + results.get(d).getFieldValue("id")); + assertEquals(prevField + " vs " + f + ": " + d, + previous.get(d).getFieldValue(prevField), + results.get(d).getFieldValue(f)); + + } + } + previous = results; + prevField = f; + } + } + + } +} diff --git a/solr/core/src/test/org/apache/solr/search/function/TestMinMaxOnMultiValuedField.java b/solr/core/src/test/org/apache/solr/search/function/TestMinMaxOnMultiValuedField.java index a90d51ba552..29fd8dc54b7 100644 --- a/solr/core/src/test/org/apache/solr/search/function/TestMinMaxOnMultiValuedField.java +++ b/solr/core/src/test/org/apache/solr/search/function/TestMinMaxOnMultiValuedField.java @@ -59,6 +59,11 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { "date_missf_", "date_missl_", "enum_missf_", "enum_missl_", "bool_missf_", "bool_missl_" }, new String [] {"_dv"}); + checkFields(new String[] {"stxt_", // no expectation on missing first/last + "stxt_missf_", "stxt_missl_" }, new String [] { "_dv"}); + checkFields(new String [] { "stxt_" }, // no expectation on missing first/last + new String [] { "_nodv", "_dv" }); + checkFields(new String [] { "stxt_missf_", "stxt_missl_" }, new String [] { "_dv"}); } @@ -71,8 +76,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { SchemaField sf = schema.getField(f); assertTrue(f + " is not multivalued", sf.multiValued()); assertEquals(f + " doesn't have expected docValues status", - f.contains("dv") || f.endsWith("_p") - || Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP), sf.hasDocValues()); + ((f.contains("dv") || f.endsWith("_p") || Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP)) + && !f.contains("nodv")), + sf.hasDocValues()); assertEquals(f + " doesn't have expected index status", ! f.contains("ni"), sf.indexed()); @@ -178,19 +184,27 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { } public void testBasicStrings() { + checkBasicStrings("val_strs_dv"); + } + public void testBasicSortableText() { + checkBasicStrings("val_stxt_s_dv"); + checkBasicStrings("val_stxt_missf_s_dv"); + checkBasicStrings("val_stxt_missl_s_dv"); + } + private void checkBasicStrings(final String field) { assertU(adoc(sdoc("id", "1", - "val_strs_dv", "dog", - "val_strs_dv", "xyz", - "val_strs_dv", "cat"))); - assertU(adoc(sdoc("id", "2"))); // 2 has no val_strs_dv values + field, "dog", + field, "xyz", + field, "cat"))); + assertU(adoc(sdoc("id", "2"))); // 2 has no values in tested field assertU(commit()); // id=1: has values assertQ(req("q","id:1" - ,"fl","exists_min_str:exists(field(val_strs_dv,min))" - ,"fl","exists_max_str:exists(field(val_strs_dv,max))" - ,"fl","min_str:field(val_strs_dv,min)" - ,"fl","max_str:field(val_strs_dv,max)" + ,"fl","exists_min_str:exists(field("+field+",min))" + ,"fl","exists_max_str:exists(field("+field+",max))" + ,"fl","min_str:field("+field+",min)" + ,"fl","max_str:field("+field+",max)" ) ,"//*[@numFound='1']" @@ -201,10 +215,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { ); // id=2: no values assertQ(req("q","id:2" - ,"fl","exists_min_str:exists(field(val_strs_dv,min))" - ,"fl","exists_max_str:exists(field(val_strs_dv,max))" - ,"fl","min_str:field(val_strs_dv,min)" - ,"fl","max_str:field(val_strs_dv,max)" + ,"fl","exists_min_str:exists(field("+field+",min))" + ,"fl","exists_max_str:exists(field("+field+",max))" + ,"fl","min_str:field("+field+",min)" + ,"fl","max_str:field("+field+",max)" ) ,"//*[@numFound='1']" @@ -219,6 +233,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { testExpectedSortOrdering("val_strs_dv", false, null, "a", "cat", "dog", "wako", "xyz", "zzzzz"); } + public void testExpectedSortOrderingSortableText() { + testExpectedSortOrdering("val_stxt_s_dv", false, + null, "a", "cat", "dog", "wako", "xyz", "zzzzz"); + } public void testExpectedSortMissingOrderings() { @@ -226,7 +244,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { // (in this simple test) we aren't using a secondary sort, so there is no way to disambiguate // docs that have those values from docs that have those *effective* sort values - testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz"); + testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz"); + testSortMissingMinMax("val_stxt", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz"); + testSortMissingMinMax("val_int", Integer.MIN_VALUE+1L, -9999, 0, 99999, Integer.MAX_VALUE-1L); testSortMissingMinMax("val_long", @@ -382,6 +402,15 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 { "docValues", req("q","*:*", "fl", "field(cat,'max')"), SolrException.ErrorCode.BAD_REQUEST); + assertQEx("no error mentioning field name when asking for max on a non-dv sortable text field", + "val_stxt_s_nodv", + req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"), + SolrException.ErrorCode.BAD_REQUEST); + assertQEx("no error mentioning 'docValues' when asking for max on a non-dv sortable field", + "docValues", + req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"), + SolrException.ErrorCode.BAD_REQUEST); + } diff --git a/solr/solr-ref-guide/src/analyzers.adoc b/solr/solr-ref-guide/src/analyzers.adoc index 206f54f7328..343fd306b48 100644 --- a/solr/solr-ref-guide/src/analyzers.adoc +++ b/solr/solr-ref-guide/src/analyzers.adoc @@ -20,7 +20,7 @@ An analyzer examines the text of fields and generates a token stream. Analyzers are specified as a child of the `` element in the `schema.xml` configuration file (in the same `conf/` directory as `solrconfig.xml`). -In normal usage, only fields of type `solr.TextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example: +In normal usage, only fields of type `solr.TextField` or `solr.SortableTextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example: [source,xml] ---- diff --git a/solr/solr-ref-guide/src/common-query-parameters.adoc b/solr/solr-ref-guide/src/common-query-parameters.adoc index a9d6cec5323..0e2f4f02fb5 100644 --- a/solr/solr-ref-guide/src/common-query-parameters.adoc +++ b/solr/solr-ref-guide/src/common-query-parameters.adoc @@ -37,10 +37,11 @@ Solr can sort query responses according to: * Document scores * <> * The value of any primative field (numerics, string, boolean, dates, etc...) which has `docValues="true"` (or `multiValued="false"` and `indexed="true"` in which case the indexed terms will used to build DocValue like structures on the fly at runtime) -* A TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term. +* A SortableTextField which implicitly uses `docValues="true"` by default to allow sorting on the original input string regardless of the analyzers used for Searching. +* A single-valued TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term per document. TextField does not support docValues="true", but a DocValue like structure will be built on the fly at runtime. ** *NOTE:* If you want to be able to sort on a field whose contents you want to tokenize to facilitate searching, <> in the the Schema to clone the field. Then search on the field and sort on its clone. -In the case of primative fields that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. This default behavior is equivilent to explicitly sorting using the 2 argument `<>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc` +In the case of primative fields, or SortableTextFields, that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting. This default behavior is equivilent to explicitly sorting using the 2 argument `<>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc` The table below explains how Solr responds to various settings of the `sort` parameter. diff --git a/solr/solr-ref-guide/src/field-types-included-with-solr.adoc b/solr/solr-ref-guide/src/field-types-included-with-solr.adoc index 66217ec29c8..3c6259fda87 100644 --- a/solr/solr-ref-guide/src/field-types-included-with-solr.adoc +++ b/solr/solr-ref-guide/src/field-types-included-with-solr.adoc @@ -69,6 +69,8 @@ Configuration and usage of PreAnalyzedField is documented in the section <