SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also sorting/faceting just like StrField

2025-03-08 17:49:29 +00:00 · 2018-02-01 10:40:29 -07:00 · 2018-02-01 10:40:29 -07:00 · 95122e1448
commit 95122e1448
parent b0b963c68e
11 changed files with 1057 additions and 22 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -149,6 +149,10 @@ New Features

 * SOLR-11890: Add multiKmeans Stream Evaluator (Joel Bernstein)

+* SOLR-11916: new SortableTextField which supports analysis/searching just like TextField, but also
+  sorting/faceting just like StrField.  By default uses only the first 1024 chars of the original
+  input string values, but this is configurable.  (hossman)
+
 Bug Fixes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/schema/SortableTextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/SortableTextField.java
@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.QParser;
+import org.apache.solr.uninverting.UninvertingReader.Type;
+
+/** 
+ * <p>
+ * <code>SortableTextField</code> is a specialized form of {@link TextField} that supports 
+ * Sorting and ValueSource functions, using <code>docValues</code> built from the first 
+ * <code>maxCharsForDocValues</code> characters of the original (pre-analyzed) String values of this field.
+ * </p>
+ * <p>
+ * The implicit default value for <code>maxCharsForDocValues</code> is <code>1024</code>.  If a field 
+ * type instance is configured with <code>maxCharsForDocValues &lt;= 0</code> this overrides the default 
+ * with an effective value of "no limit" ({@link Integer#MAX_VALUE}).
+ * </p>
+ * <p>
+ * Instances of this FieldType implicitly default to <code>docValues="true"</code> unless explicitly 
+ * configured with <code>docValues="false"</code>.
+ * </p>
+ * <p>
+ * Just like {@link StrField}, instances of this field that are <code>multiValued="true"</code> support 
+ * the <code>field(name,min|max)</code> function, and implicitly sort on <code>min|max</code> depending 
+ * on the <code>asc|desc</code> direction selector.
+ * </p>
+ *
+ * <p>
+ * <b>NOTE:</b> Unlike most other FieldTypes, this class defaults to 
+ * <code>useDocValuesAsStored="false"</code>.  If an instance of this type (or a field that uses this type) 
+ * overrides this behavior to set <code>useDocValuesAsStored="true"</code> then instead of truncating the 
+ * original string value based on the effective value of <code>maxCharsForDocValues</code>, this class 
+ * will reject any documents w/a field value longer then that limit -- causing the document update to fail.
+ * This behavior exists to prevent situations that could result in a search client reieving only a truncated
+ * version of the original field value in place of a <code>stored</code> value.
+ * </p>
+ */
+public class SortableTextField extends TextField {
+
+  public static final int DEFAULT_MAX_CHARS_FOR_DOC_VALUES = 1024;
+  
+  private int maxCharsForDocValues = DEFAULT_MAX_CHARS_FOR_DOC_VALUES;
+  
+  protected void init(IndexSchema schema, Map<String,String> args) {
+    { 
+      final String maxS = args.remove("maxCharsForDocValues");
+      if (maxS != null) {
+        maxCharsForDocValues = Integer.parseInt(maxS);
+        if (maxCharsForDocValues <= 0) {
+          maxCharsForDocValues = Integer.MAX_VALUE;
+        }
+      }
+    }
+
+    // by the time our init() is called, super.setArgs has already removed & procesesd any explicit
+    // "docValues=foo" or useDocValuesAsStored=bar args...
+    //  - If the user explicitly said docValues=false, we want to respect that and not change it.
+    //    - if the user didn't explicit specify anything, then we want to implicitly *default* docValues=true
+    //  - The inverse is true for useDocValuesAsStored=true:
+    //    - if explict, then respect it; else implicitly default to useDocValuesAsStored=false
+    // ...lucky for us, setArgs preserved info about explicitly set true|false properties...
+    if (! on(falseProperties, DOC_VALUES)) {
+      properties |= DOC_VALUES;
+    }
+    if (! on(trueProperties, USE_DOCVALUES_AS_STORED)) {
+      properties &= ~USE_DOCVALUES_AS_STORED;
+    }
+    
+    super.init(schema, args);
+  }
+
+  @Override
+  public List<IndexableField> createFields(SchemaField field, Object value) {
+    IndexableField f = createField( field, value);
+    if (! field.hasDocValues()) {
+      return Collections.singletonList(f);
+    }
+    final String origString = value.toString();
+    final int origLegth = origString.length();
+    final boolean truncate = maxCharsForDocValues < origLegth;
+    if (field.useDocValuesAsStored() && truncate) {
+      // if the user has explicitly configured useDocValuesAsStored, we need a special
+      // check to fail docs where the values are too long -- we don't want to silently
+      // accept and then have search queries returning partial values
+      throw new SolrException
+        (SolrException.ErrorCode.BAD_REQUEST,
+         "Can not use field " + field.getName() + " with values longer then maxCharsForDocValues=" +
+         maxCharsForDocValues + " when useDocValuesAsStored=true (length=" + origLegth + ")");
+    }
+    final BytesRef bytes = new BytesRef(truncate ? origString.subSequence(0, maxCharsForDocValues) : origString);
+                                        
+    final IndexableField docval = field.multiValued()
+      ? new SortedSetDocValuesField(field.getName(), bytes)
+      : new SortedDocValuesField(field.getName(), bytes);
+    
+    if (null == f) {
+      return Collections.singletonList(docval);
+    } 
+    return Arrays.asList(f, docval);
+  }
+
+  
+  /** 
+   * {@inheritDoc} 
+   * this field type supports DocValues, this method is always a No-Op 
+   */
+  @Override
+  protected void checkSupportsDocValues() {
+    // No-Op
+  }
+  
+  @Override
+  public SortField getSortField(SchemaField field, boolean reverse) {
+    if (! field.hasDocValues()) {
+      // type defaults to docValues=true, so error msg from perspective that
+      // either type or field must have docValues="false"
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                              "Can not sort on this type of field when docValues=\"false\", field: " + field.getName());
+    }
+    
+    // NOTE: we explicitly bypass super.getSortField so that our getDefaultMultiValueSelectorForSort
+    // is used and we don't get the historic Uninversion behavior of TextField.
+    return getStringSort(field, reverse);
+  }
+  
+  @Override
+  public ValueSource getValueSource(SchemaField field, QParser parser) {
+    if (! field.hasDocValues()) {
+      // type defaults to docValues=true, so error msg from perspective that
+      // either type or field must have docValues="false"
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                              "Can not use ValueSource on this type of field when docValues=\"false\", field: " + field.getName());
+    }
+    return super.getValueSource(field, parser);
+  }
+  
+  @Override
+  public MultiValueSelector getDefaultMultiValueSelectorForSort(SchemaField field, boolean reverse) {
+    return reverse ? MultiValueSelector.MAX : MultiValueSelector.MIN;
+  }
+  
+  @Override
+  public ValueSource getSingleValueSource(MultiValueSelector choice, SchemaField field, QParser parser) {
+    // trivial base case
+    if (!field.multiValued()) {
+      // single value matches any selector
+      return getValueSource(field, parser);
+    }
+    
+    // See LUCENE-6709
+    if (! field.hasDocValues()) {
+      // type defaults to docValues=true, so error msg from perspective that
+      // either type or field must have docValues="false"
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                              "Can not select  '" + choice.toString() + "' value from multivalued field ("+
+                              field.getName() +") when docValues=\"false\", field: " + field.getName());
+    }
+    SortedSetSelector.Type selectorType = choice.getSortedSetSelectorType();
+    if (null == selectorType) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                              choice.toString() + " is not a supported option for picking a single value"
+                              + " from the multivalued field: " + field.getName() +
+                              " (type: " + this.getTypeName() + ")");
+    }
+    
+    return new SortedSetFieldSource(field.getName(), selectorType);
+  }
+
+  /** 
+   * {@inheritDoc} 
+   * this field type is not uninvertable, this method always returns null 
+   */
+  @Override
+  public Type getUninversionType(SchemaField sf) {
+    return null;
+  }
+
+  /** 
+   * {@inheritDoc} 
+   * This implementation always returns false. 
+   */
+  @Override
+  public boolean multiValuedFieldCache() {
+    return false;
+  }
+
+}
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -118,8 +118,13 @@ public class TextField extends FieldType {
  @Override
  public SortField getSortField(SchemaField field, boolean reverse) {
    /* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */
-    return getSortedSetSortField(field, SortedSetSelector.Type.MIN, reverse,
-                                 SortField.STRING_FIRST, SortField.STRING_LAST);
+    return getSortedSetSortField(field,
+                                 // historical behavior based on how the early versions of the FieldCache
+                                 // would deal with multiple indexed terms in a singled valued field...
+                                 //
+                                 // Always use the 'min' value from the (Uninverted) "psuedo doc values"
+                                 SortedSetSelector.Type.MIN,
+                                 reverse, SortField.STRING_FIRST, SortField.STRING_LAST);
  }
  
  @Override
--- a/solr/core/src/test-files/solr/collection1/conf/schema-sorting-text.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-sorting-text.xml
@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<schema name="minimal" version="1.6">
+  <uniqueKey>id</uniqueKey>
+  
+  <field name="id" type="str" indexed="true" docValues="true" stored="true" />
+  <field name="id_i" type="int" indexed="true" docValues="true" stored="true" />
+
+  <!-- NOTE: docValues="true" should be implicit for SortableTextField -->
+
+  <field name="whitespace_stxt" type="whitespace_stxt" indexed="true" stored="true" />
+  <field name="whitespace_m_stxt" type="whitespace_stxt" indexed="true" stored="true" multiValued="true" />
+  <!-- explicit docValues="false" in this version... -->
+  <field name="whitespace_nodv_stxt" type="whitespace_stxt" indexed="true" docValues="false" stored="true" />
+  <!-- only docValues in this version, no index or stored... -->
+  <field name="whitespace_nois_stxt" type="whitespace_stxt" indexed="false" docValues="true" stored="false" />
+
+  <field name="whitespace_max3_stxt" type="whitespace_max3_stxt" />
+  <field name="whitespace_max6_stxt" type="whitespace_max6_stxt" />
+  <field name="whitespace_max0_stxt" type="whitespace_max0_stxt" />
+  <field name="whitespace_maxNeg_stxt" type="whitespace_maxNeg_stxt" />
+  
+  <field name="whitespace_f_stxt" type="whitespace_f_stxt" indexed="true" docValues="true" stored="true" />
+  <field name="whitespace_l_stxt" type="whitespace_l_stxt" indexed="true" docValues="true" stored="true" />
+
+  <field name="keyword_stxt" type="keyword_stxt" indexed="true" stored="true" />
+  <!-- explicit docValues="true" in this field version... -->
+  <field name="keyword_dv_stxt" type="keyword_stxt" indexed="true" docValues="true" stored="true" />
+
+  <!-- for behavioral equivilency testing -->
+  <field name="whitespace_plain_txt" type="whitespace_plain_txt" />
+  <field name="whitespace_plain_str" type="str" />
+  <field name="keyword_s_dv" type="str" indexed="false" docValues="true" stored="true" />
+  <field name="keyword_s" type="str" indexed="true" docValues="false" stored="true" />
+
+  <!-- . -->
+  
+  <copyField source="whitespace_stxt" dest="whitespace_m_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_nodv_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_nois_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_max3_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_max6_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_max0_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_maxNeg_stxt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_plain_txt"/>
+  <copyField source="whitespace_stxt" dest="whitespace_plain_str"/>
+  
+  <copyField source="whitespace_f_stxt" dest="whitespace_l_stxt"/>
+  
+  <copyField source="keyword_stxt" dest="keyword_dv_stxt"/>
+  <copyField source="keyword_stxt" dest="keyword_s"/>
+  <copyField source="keyword_stxt" dest="keyword_s_dv"/>
+  
+  <copyField source="id" dest="id_i"/>
+  
+  <!-- . -->
+  
+  <!-- NOTE: explicitly not specifying docValues=true on these, it should be implicit default -->
+  <fieldType name="whitespace_stxt" class="solr.SortableTextField">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="whitespace_max3_stxt" class="solr.SortableTextField" maxCharsForDocValues="3">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="whitespace_max6_stxt" class="solr.SortableTextField" maxCharsForDocValues="6">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="whitespace_max0_stxt" class="solr.SortableTextField" maxCharsForDocValues="0">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="whitespace_maxNeg_stxt" class="solr.SortableTextField" maxCharsForDocValues="-42">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="whitespace_l_stxt" class="solr.SortableTextField" sortMissingLast="true">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="whitespace_f_stxt" class="solr.SortableTextField" sortMissingFirst="true">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+  <fieldType name="keyword_stxt" class="solr.SortableTextField">
+    <analyzer>
+      <tokenizer class="solr.KeywordTokenizerFactory" />
+    </analyzer>
+  </fieldType>
+
+  <!-- some special edge cases for testing how useDocValuesAsStored (on type or field)
+       interacts with maxCharsForDocValues
+       NOTE: 2 types and 6 fields ending in '_usedvs': either '_has_usedvs' or '_negates_usedvs'
+  -->
+  <!-- max6 -->
+  <field name="max6_field_has_usedvs" type="whitespace_max6_stxt" stored="false" useDocValuesAsStored="true" />
+  <field name="max6_type_has_usedvs" type="max6_type_has_usedvs" />
+  <field name="max6_field_negates_usedvs" type="max6_type_has_usedvs" useDocValuesAsStored="false" />
+  <fieldType name="max6_type_has_usedvs" class="solr.SortableTextField" stored="false"
+             maxCharsForDocValues="6" useDocValuesAsStored="true">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory" />
+    </analyzer>
+  </fieldType>
+  <!-- max0 -->
+  <field name="max0_field_has_usedvs" type="whitespace_max0_stxt" stored="false" useDocValuesAsStored="true" />
+  <field name="max0_type_has_usedvs" type="max0_type_has_usedvs" />
+  <field name="max0_field_negates_usedvs" type="max0_type_has_usedvs" useDocValuesAsStored="false" />
+  <fieldType name="max0_type_has_usedvs" class="solr.SortableTextField" stored="false"
+             maxCharsForDocValues="0" useDocValuesAsStored="true">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory" />
+    </analyzer>
+  </fieldType>
+
+  
+  <fieldType name="str" class="solr.StrField"/>
+  <fieldType name="whitespace_plain_txt" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory" />
+    </analyzer>
+  </fieldType>
+  
+  <fieldType name="int" class="${solr.tests.IntegerFieldType}"/>
+</schema>
--- a/solr/core/src/test-files/solr/collection1/conf/schema11.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema11.xml
@ -519,6 +519,20 @@ valued. -->
    </analyzer>
  </fieldType>

+  <!-- See TestMinMaxOnMultiValuedField -->
+  <field name="val_stxt_s_dv" type="whitespace_stxt" multiValued="true"/>
+  <field name="val_stxt_missf_s_dv" type="whitespace_stxt" multiValued="true" sortMissingFirst="true"/>
+  <field name="val_stxt_missl_s_dv" type="whitespace_stxt" multiValued="true" sortMissingLast="true"/>
+  <field name="val_stxt_s_nodv" type="whitespace_stxt" multiValued="true" docValues="false" />
+  <!-- NOTE: explicitly not specifying docValues=true, it should be implicit default -->
+  <fieldType name="whitespace_stxt" class="solr.SortableTextField" indexed="true" stored="true">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+
+
+  
 <!-- Field to use to determine and enforce document uniqueness. 
      Unless this field is marked with required="false", it will be a required field
   -->
--- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
@ -24,6 +24,7 @@ import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.misc.SweetSpotSimilarity;
 import org.apache.lucene.search.similarities.Similarity;

+import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.schema.SimilarityFactory;
@ -84,7 +85,6 @@ public class TestBulkSchemaAPI extends RestTestBase {
      jetty.stop();
      jetty = null;
    }
-    client = null;
    if (restTestHarness != null) {
      restTestHarness.close();
    }
@ -840,7 +840,61 @@ public class TestBulkSchemaAPI extends RestTestBase {
    map = (Map)ObjectBuilder.getVal(new JSONParser(new StringReader(response)));
    assertNull(map.get("error"));
  }
+  public void testSortableTextFieldWithAnalyzer() throws Exception {
+    String fieldTypeName = "sort_text_type";
+    String fieldName = "sort_text";
+    String payload = "{\n" +
+        "  'add-field-type' : {" +
+        "    'name' : '" + fieldTypeName + "',\n" +
+        "    'stored':true,\n" +
+        "    'indexed':true\n" +
+        "    'maxCharsForDocValues':6\n" +
+        "    'class':'solr.SortableTextField',\n" +
+        "    'analyzer' : {'tokenizer':{'class':'solr.WhitespaceTokenizerFactory'}},\n" +
+        "  },\n"+
+        "  'add-field' : {\n" +
+        "    'name':'" + fieldName + "',\n" +
+        "    'type': '"+fieldTypeName+"',\n" +
+        "  }\n" +
+        "}\n";

+    String response = restTestHarness.post("/schema", json(payload));
+
+    Map map = (Map) ObjectBuilder.getVal(new JSONParser(new StringReader(response)));
+    assertNull(response, map.get("errors"));
+
+    Map fields = getObj(restTestHarness, fieldName, "fields");
+    assertNotNull("field " + fieldName + " not created", fields);
+
+    assertEquals(0,
+                 getSolrClient().add(Arrays.asList(sdoc("id","1",fieldName,"xxx aaa"),
+                                                   sdoc("id","2",fieldName,"xxx bbb aaa"),
+                                                   sdoc("id","3",fieldName,"xxx bbb zzz"))).getStatus());
+                                                   
+    assertEquals(0, getSolrClient().commit().getStatus());
+    {
+      SolrDocumentList docs = getSolrClient().query
+        (params("q",fieldName+":xxx","sort", fieldName + " asc, id desc")).getResults();
+         
+      assertEquals(3L, docs.getNumFound());
+      assertEquals(3L, docs.size());
+      assertEquals("1", docs.get(0).getFieldValue("id"));
+      assertEquals("3", docs.get(1).getFieldValue("id"));
+      assertEquals("2", docs.get(2).getFieldValue("id"));
+    }
+    {
+      SolrDocumentList docs = getSolrClient().query
+        (params("q",fieldName+":xxx", "sort", fieldName + " desc, id asc")).getResults();
+                                                           
+      assertEquals(3L, docs.getNumFound());
+      assertEquals(3L, docs.size());
+      assertEquals("2", docs.get(0).getFieldValue("id"));
+      assertEquals("3", docs.get(1).getFieldValue("id"));
+      assertEquals("1", docs.get(2).getFieldValue("id"));
+    }
+    
+  }
+  
  public void testSimilarityParser() throws Exception {
    RestTestHarness harness = restTestHarness;

--- a/solr/core/src/test/org/apache/solr/schema/TestSortableTextField.java
+++ b/solr/core/src/test/org/apache/solr/schema/TestSortableTextField.java
@ -0,0 +1,562 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.schema;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.util.TestUtil;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RefCounted;
+
+import org.junit.Before;
+import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.instanceOf;
+
+public class TestSortableTextField extends SolrTestCaseJ4 {
+
+  protected static final String BIG_CONST
+    = StringUtils.repeat("x", SortableTextField.DEFAULT_MAX_CHARS_FOR_DOC_VALUES);
+  
+  @BeforeClass
+  public static void create() throws Exception {
+    initCore("solrconfig-minimal.xml","schema-sorting-text.xml");
+    
+    // sanity check our fields & types...
+
+    // these should all use docValues (either explicitly or implicitly)...
+    for (String n : Arrays.asList("keyword_stxt", 
+                                  "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
+           
+      FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
+      assertEquals("type " + ft.getTypeName() + " should have docvalues - schema got changed?",
+                   true, ft.getNamedPropertyValues(true).get("docValues")) ;
+    }
+    for (String n : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
+                                  "whitespace_stxt", "whitespace_nois_stxt",
+                                  "whitespace_f_stxt", "whitespace_l_stxt")) {
+                                  
+      SchemaField sf = h.getCore().getLatestSchema().getField(n);
+      assertTrue("field " + sf.getName() + " should have docvalues - schema got changed?",
+                 sf.hasDocValues()) ;
+    }
+
+    { // this field should *NOT* have docValues .. should behave like a plain old TextField
+      SchemaField sf = h.getCore().getLatestSchema().getField("whitespace_nodv_stxt");
+      assertFalse("field " + sf.getName() + " should not have docvalues - schema got changed?",
+                  sf.hasDocValues()) ;
+    }
+    
+  }
+  
+  @Before
+  public void cleanup() throws Exception {
+    clearIndex();
+  }
+
+  public void testSimple() throws Exception {
+    assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?", "whitespace_f_stxt", "aaa bbb"));
+    assertU(adoc("id","2", "whitespace_stxt", "how now brown dog ?", "whitespace_f_stxt", "bbb aaa"));
+    assertU(adoc("id","3", "whitespace_stxt", "how now brown cat ?", "whitespace_f_stxt", "xxx yyy"));
+    assertU(adoc("id","4", "whitespace_stxt", "dog and cat"          /* no val for whitespace_f_stxt */));
+                 
+    assertU(commit());
+
+    // search & sort
+    // NOTE: even if the field is indexed=false, should still be able to sort on it
+    for (String sortf : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt", "whitespace_plain_str")) {
+      assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " asc")
+              , "//*[@numFound='2']"
+              , "//result/doc[1]/str[@name='id'][.=4]"
+              , "//result/doc[2]/str[@name='id'][.=3]"
+              );
+      assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " desc")
+              , "//*[@numFound='2']"
+              , "//result/doc[1]/str[@name='id'][.=3]"
+              , "//result/doc[2]/str[@name='id'][.=4]"
+              );
+      assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " asc")
+              , "//*[@numFound='3']"
+              , "//result/doc[1]/str[@name='id'][.=3]"
+              , "//result/doc[2]/str[@name='id'][.=1]"
+              , "//result/doc[3]/str[@name='id'][.=2]"
+              );
+      assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " desc")
+              , "//*[@numFound='3']"
+              , "//result/doc[1]/str[@name='id'][.=2]"
+              , "//result/doc[2]/str[@name='id'][.=1]"
+              , "//result/doc[3]/str[@name='id'][.=3]"
+              );
+      
+      // we should still be able to search if docValues="false" (but sort on a diff field)
+      assertQ(req("q","whitespace_nodv_stxt:cat", "sort", sortf + " asc")
+              , "//*[@numFound='2']"
+              , "//result/doc[1]/str[@name='id'][.=4]"
+              , "//result/doc[2]/str[@name='id'][.=3]"
+              );
+    }
+    
+    // attempting to sort on docValues="false" field should give an error...
+    assertQEx("attempting to sort on docValues=false field should give an error",
+              "when docValues=\"false\"",
+              req("q","*:*", "sort", "whitespace_nodv_stxt asc"),
+              ErrorCode.BAD_REQUEST);
+
+    // sortMissing - whitespace_f_stxt copyField to whitespace_l_stxt
+    assertQ(req("q","*:*", "sort", "whitespace_f_stxt asc")
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=4]"
+            , "//result/doc[2]/str[@name='id'][.=1]"
+            , "//result/doc[3]/str[@name='id'][.=2]"
+            , "//result/doc[4]/str[@name='id'][.=3]"
+            );    
+    assertQ(req("q","*:*", "sort", "whitespace_f_stxt desc")
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=4]"
+            , "//result/doc[2]/str[@name='id'][.=3]"
+            , "//result/doc[3]/str[@name='id'][.=2]"
+            , "//result/doc[4]/str[@name='id'][.=1]"
+            );    
+    assertQ(req("q","*:*", "sort", "whitespace_l_stxt asc")
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=1]"
+            , "//result/doc[2]/str[@name='id'][.=2]"
+            , "//result/doc[3]/str[@name='id'][.=3]"
+            , "//result/doc[4]/str[@name='id'][.=4]"
+            );    
+    assertQ(req("q","*:*", "sort", "whitespace_l_stxt desc")
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=3]"
+            , "//result/doc[2]/str[@name='id'][.=2]"
+            , "//result/doc[3]/str[@name='id'][.=1]"
+            , "//result/doc[4]/str[@name='id'][.=4]"
+            );
+  }
+
+  public void testSimpleSearchAndFacets() throws Exception {
+    assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?"));
+    assertU(adoc("id","2", "whitespace_stxt", "how now brown cow ?"));
+    assertU(adoc("id","3", "whitespace_stxt", "holy cow !"));
+    assertU(adoc("id","4", "whitespace_stxt", "dog and cat"));
+    
+    assertU(commit());
+
+    // NOTE: even if the field is indexed=false, should still be able to facet on it
+    for (String facet : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt",
+                                      "whitespace_m_stxt", "whitespace_plain_str")) {
+      for (String search : Arrays.asList("whitespace_stxt", "whitespace_nodv_stxt",
+                                         "whitespace_m_stxt", "whitespace_plain_txt")) {
+        // facet.field
+        final String fpre = "//lst[@name='facet_fields']/lst[@name='"+facet+"']/";
+        assertQ(req("q", search + ":cow", "rows", "0", 
+                    "facet.field", facet, "facet", "true")
+                , "//*[@numFound='3']"
+                , fpre + "int[@name='how now brown cow ?'][.=2]"
+                , fpre + "int[@name='holy cow !'][.=1]"
+                , fpre + "int[@name='dog and cat'][.=0]"
+                );
+        
+        // json facet
+        final String jpre = "//lst[@name='facets']/lst[@name='x']/arr[@name='buckets']/";
+        assertQ(req("q", search + ":cow", "rows", "0", 
+                    "json.facet", "{x:{ type: terms, field:'" + facet + "', mincount:0 }}")
+                , "//*[@numFound='3']"
+                , jpre + "lst[str[@name='val'][.='how now brown cow ?']][int[@name='count'][.=2]]"
+                , jpre + "lst[str[@name='val'][.='holy cow !']][int[@name='count'][.=1]]"
+                , jpre + "lst[str[@name='val'][.='dog and cat']][int[@name='count'][.=0]]"
+                );
+        
+      }
+    }
+  }
+
+  
+  public void testWhiteboxIndexReader() throws Exception {
+    assertU(adoc("id","1",
+                 "whitespace_stxt", "how now brown cow ?",
+                 "whitespace_m_stxt", "xxx",
+                 "whitespace_m_stxt", "yyy",
+                 "whitespace_f_stxt", "aaa bbb",
+                 "keyword_stxt", "Blarggghhh!"));
+    assertU(commit());
+
+    final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
+    try {
+      final LeafReader r = searcher.get().getSlowAtomicReader();
+
+      // common cases...
+      for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
+                                        "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
+        assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
+        assertEquals("DocValuesType: " + field,
+                     DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType());
+        assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
+        assertNotNull("Terms: " + field, r.terms(field));
+                      
+      }
+      
+      // special cases...
+      assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt"));
+      assertEquals(DocValuesType.NONE,
+                   r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType());
+      assertNull(r.getSortedDocValues("whitespace_nodv_stxt"));
+      assertNotNull(r.terms("whitespace_nodv_stxt"));
+      // 
+      assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt"));
+      assertEquals(DocValuesType.SORTED,
+                   r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType());
+      assertNotNull(r.getSortedDocValues("whitespace_nois_stxt"));
+      assertNull(r.terms("whitespace_nois_stxt"));
+      //
+      assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt"));
+      assertEquals(DocValuesType.SORTED_SET,
+                   r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType());
+      assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt"));
+      assertNotNull(r.terms("whitespace_m_stxt"));
+        
+    } finally {
+      if (null != searcher) {
+        searcher.decref();
+      }
+    }
+  }
+  
+  public void testWhiteboxCreateFields() throws Exception {
+    List<IndexableField> values = null;
+
+    // common case...
+    for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
+                                      "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
+      values = createIndexableFields(field);
+      assertEquals(field, 2, values.size());
+      assertThat(field, values.get(0), instanceOf(Field.class));
+      assertThat(field, values.get(1), instanceOf(SortedDocValuesField.class));
+    }
+    
+    // special cases...
+    values = createIndexableFields("whitespace_nois_stxt");
+    assertEquals(1, values.size());
+    assertThat(values.get(0), instanceOf(SortedDocValuesField.class));
+    //
+    values = createIndexableFields("whitespace_nodv_stxt");
+    assertEquals(1, values.size());
+    assertThat(values.get(0), instanceOf(Field.class));
+    //
+    values = createIndexableFields("whitespace_m_stxt");
+    assertEquals(2, values.size());
+    assertThat(values.get(0), instanceOf(Field.class));
+    assertThat(values.get(1), instanceOf(SortedSetDocValuesField.class));      
+  }
+  private List<IndexableField> createIndexableFields(String fieldName) {
+    SchemaField sf = h.getCore().getLatestSchema().getField(fieldName);
+    return sf.getType().createFields(sf, "dummy value");
+  }
+
+  public void testMaxCharsSort() throws Exception {
+    assertU(adoc("id","1", "whitespace_stxt", "aaa bbb ccc ddd"));
+    assertU(adoc("id","2", "whitespace_stxt", "aaa bbb xxx yyy"));
+    assertU(adoc("id","3", "whitespace_stxt", "aaa bbb ccc xxx"));
+    assertU(adoc("id","4", "whitespace_stxt", "aaa"));
+    assertU(commit());
+
+    // all terms should be searchable in all fields, even if the docvalues are limited
+    for (String searchF : Arrays.asList("whitespace_stxt", "whitespace_plain_txt",
+                                        "whitespace_max3_stxt", "whitespace_max6_stxt",
+                                        "whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
+      //  maxChars of 0 or neg should be equivilent to no max at all
+      for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_plain_str", 
+                                        "whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
+        
+        assertQ(req("q", searchF + ":ccc", "sort", sortF + " desc, id asc")
+                , "//*[@numFound='2']"
+                , "//result/doc[1]/str[@name='id'][.=3]"
+                , "//result/doc[2]/str[@name='id'][.=1]"
+                );
+        
+        assertQ(req("q", searchF + ":ccc", "sort", sortF + " asc, id desc")
+                , "//*[@numFound='2']"
+                , "//result/doc[1]/str[@name='id'][.=1]"
+                , "//result/doc[2]/str[@name='id'][.=3]"
+                );
+      }
+    }
+    
+    // sorting on a maxChars limited fields should force tie breaker
+    for (String dir : Arrays.asList("asc", "desc")) {
+      // for max3, dir shouldn't matter - should always tie..
+      assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id desc") // max3, id desc
+              , "//*[@numFound='4']"
+              , "//result/doc[1]/str[@name='id'][.=4]"
+              , "//result/doc[2]/str[@name='id'][.=3]"
+              , "//result/doc[3]/str[@name='id'][.=2]"
+              , "//result/doc[4]/str[@name='id'][.=1]"
+              );
+      assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id asc") // max3, id desc
+              , "//*[@numFound='4']"
+              , "//result/doc[1]/str[@name='id'][.=1]"
+              , "//result/doc[2]/str[@name='id'][.=2]"
+              , "//result/doc[3]/str[@name='id'][.=3]"
+              , "//result/doc[4]/str[@name='id'][.=4]"
+              );
+    }
+    assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id desc") // max6 asc, id desc
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
+            , "//result/doc[2]/str[@name='id'][.=3]"
+            , "//result/doc[3]/str[@name='id'][.=2]"
+            , "//result/doc[4]/str[@name='id'][.=1]"
+            );
+    assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id asc") // max6 asc, id desc
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
+            , "//result/doc[2]/str[@name='id'][.=1]"
+            , "//result/doc[3]/str[@name='id'][.=2]"
+            , "//result/doc[4]/str[@name='id'][.=3]"
+            );
+    assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id desc") // max6 desc, id desc
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=3]"
+            , "//result/doc[2]/str[@name='id'][.=2]"
+            , "//result/doc[3]/str[@name='id'][.=1]"
+            , "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
+            );
+    assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id asc") // max6 desc, id desc
+            , "//*[@numFound='4']"
+            , "//result/doc[1]/str[@name='id'][.=1]"
+            , "//result/doc[2]/str[@name='id'][.=2]"
+            , "//result/doc[3]/str[@name='id'][.=3]"
+            , "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
+            );
+    
+    // sanity check that the default max is working....
+    assertU(adoc("id","5", "whitespace_stxt", BIG_CONST + " aaa zzz"));
+    assertU(adoc("id","6", "whitespace_stxt", BIG_CONST + " bbb zzz "));
+    assertU(commit());
+    // for these fields, the tie breaker should be the only thing that matters, regardless of direction...
+    for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt")) {
+      for (String dir : Arrays.asList("asc", "desc")) {
+        assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id asc")
+                , "//*[@numFound='2']"
+                , "//result/doc[1]/str[@name='id'][.=5]"
+                , "//result/doc[2]/str[@name='id'][.=6]"
+                );
+        assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id desc")
+                , "//*[@numFound='2']"
+                , "//result/doc[1]/str[@name='id'][.=6]"
+                , "//result/doc[2]/str[@name='id'][.=5]"
+                );
+      }
+    }
+  }
+
+  /**
+   * test how various permutations of useDocValuesAsStored and maxCharsForDocValues interact
+   */
+  public void testUseDocValuesAsStored() throws Exception {
+    ignoreException("when useDocValuesAsStored=true \\(length=");
+    
+    // first things first...
+    // unlike most field types, SortableTextField should default to useDocValuesAsStored==false
+    // (check a handful that should have the default behavior)
+    for (String n : Arrays.asList("keyword_stxt", "whitespace_max0_stxt", "whitespace_max6_stxt")) {
+      {
+        FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
+        assertEquals("type " + ft.getTypeName() + " should not default to useDocValuesAsStored",
+                     false, ft.useDocValuesAsStored()) ;
+      }
+      {
+        SchemaField sf = h.getCore().getLatestSchema().getField(n);
+        assertEquals("field " + sf.getName() + " should not default to useDocValuesAsStored",
+                     false, sf.useDocValuesAsStored()) ;
+      }
+    }
+    
+    // but it should be possible to set useDocValuesAsStored=true explicitly on types...
+    int num_types_found = 0;
+    for (Map.Entry<String,FieldType> entry : h.getCore().getLatestSchema().getFieldTypes().entrySet()) {
+      if (entry.getKey().endsWith("_has_usedvs")) {
+        num_types_found++;
+        FieldType ft = entry.getValue();
+        assertEquals("type " + ft.getTypeName() + " has unexpected useDocValuesAsStored value",
+                     true, ft.useDocValuesAsStored()) ;
+      }
+    }
+    assertEquals("sanity check: wrong number of *_has_usedvs types found -- schema changed?",
+                 2, num_types_found);
+
+    
+    // ...and it should be possible to set/override useDocValuesAsStored=true on fields...
+    int num_fields_found = 0;
+    List<String> xpaths = new ArrayList<>(42);
+    for (Map.Entry<String,SchemaField> entry : h.getCore().getLatestSchema().getFields().entrySet()) {
+      if (entry.getKey().endsWith("_usedvs")) {
+        num_fields_found++;
+        final SchemaField sf = entry.getValue();
+        final String name = sf.getName();
+        
+        // some sanity check before we move on with the rest of our testing...
+        assertFalse("schema change? field should not be stored=true: " + name, sf.stored());
+        final boolean usedvs = name.endsWith("_has_usedvs");
+        assertTrue("schema change broke assumptions: field must be '*_has_usedvs' or '*_negates_usedvs': " +
+                   name, usedvs ^ name.endsWith("_negates_usedvs"));
+        final boolean max6 = name.startsWith("max6_");
+        assertTrue("schema change broke assumptions: field must be 'max6_*' or 'max0_*': " +
+                   name, max6 ^ name.startsWith("max0_"));
+        
+        assertEquals("Unexpected useDocValuesAsStored value for field: " + name,
+                     usedvs, sf.useDocValuesAsStored()) ;
+        
+        final String docid = ""+num_fields_found;
+        if (usedvs && max6) {
+          // if useDocValuesAsStored==true and maxCharsForDocValues=N then longer values should fail
+          
+          final String doc = adoc("id", docid, name, "apple pear orange");
+          SolrException ex = expectThrows(SolrException.class, () -> { assertU(doc); });
+          for (String expect : Arrays.asList("field " + name,
+                                             "length=17",
+                                             "useDocValuesAsStored=true",
+                                             "maxCharsForDocValues=6")) {
+            assertTrue("exception must mention " + expect + ": " + ex.getMessage(),
+                       ex.getMessage().contains(expect));
+          }
+        } else {
+          // otherwise (useDocValuesAsStored==false *OR* maxCharsForDocValues=0) any value
+          // should be fine when adding a doc and we should be able to search for it later...
+          final String val = docid + " apple pear orange " + BIG_CONST;
+          assertU(adoc("id", docid, name, val));
+          String doc_xpath = "//result/doc[str[@name='id'][.='"+docid+"']]";
+            
+          if (usedvs) {
+            // ...and if it *does* usedvs, then we should defnitely see our value when searching...
+            doc_xpath = doc_xpath + "[str[@name='"+name+"'][.='"+val+"']]";
+          } else {
+            // ...but if not, then we should definitely not see any value for our field...
+            doc_xpath = doc_xpath + "[not(str[@name='"+name+"'])]";
+          }
+          xpaths.add(doc_xpath);
+        }
+      }
+    }
+    assertEquals("sanity check: wrong number of *_usedvs fields found -- schema changed?",
+                 6, num_fields_found);
+    
+    // check all our expected docs can be found (with the expected values)
+    assertU(commit());
+    xpaths.add("//*[@numFound='"+xpaths.size()+"']");
+    assertQ(req("q", "*:*", "fl", "*"), xpaths.toArray(new String[xpaths.size()]));
+  }
+    
+
+  
+  /**
+   * tests that a SortableTextField using KeywordTokenzier (w/docValues) behaves exactly the same as 
+   * StrFields that it's copied to for quering and sorting
+   */
+  public void testRandomStrEquivilentBehavior() throws Exception {
+    final List<String> test_fields = Arrays.asList("keyword_stxt", "keyword_dv_stxt",
+                                                   "keyword_s_dv", "keyword_s");
+    // we use embedded client instead of assertQ: we want to compare the responses from multiple requests
+    final SolrClient client = new EmbeddedSolrServer(h.getCore());
+    
+    final int numDocs = atLeast(100);
+    final int magicIdx = TestUtil.nextInt(random(), 1, numDocs);
+    String magic = null;
+    for (int i = 1; i <= numDocs; i++) {
+
+      // ideally we'd test all "realistic" unicode string, but EmbeddedSolrServer uses XML request writer
+      // and has no option to change this so ctrl-characters break the request
+      final String val = TestUtil.randomSimpleString(random(), 100);
+      if (i == magicIdx) {
+        magic = val;
+      }
+      assertEquals(0, client.add(sdoc("id", ""+i, "keyword_stxt", val)).getStatus());
+      
+    }
+    assertNotNull(magic);
+    
+    assertEquals(0, client.commit().getStatus());
+
+    // query for magic term should match same doc regardless of field (reminder: keyword tokenizer)
+    // (we need the filter in the unlikely event that magic value with randomly picked twice)
+    for (String f : test_fields) {
+      
+      final SolrDocumentList results = client.query(params("q", "{!field f="+f+" v=$v}",
+                                                           "v", magic,
+                                                           "fq", "id:" + magicIdx )).getResults();
+      assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + results,
+                   1L, results.getNumFound());
+      final SolrDocument doc = results.get(0);
+      assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
+                   ""+magicIdx, doc.getFieldValue("id"));
+      assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
+                   magic, doc.getFieldValue(f));
+    }
+
+    // do some random id range queries using all 3 fields for sorting.  results should be identical
+    final int numQ = atLeast(10);
+    for (int i = 0; i < numQ; i++) {
+      final int hi = TestUtil.nextInt(random(), 1, numDocs-1);
+      final int lo = TestUtil.nextInt(random(), 1, hi);
+      final boolean fwd = random().nextBoolean();
+      
+      SolrDocumentList previous = null;
+      String prevField = null;
+      for (String f : test_fields) {
+        final SolrDocumentList results = client.query(params("q","id_i:["+lo+" TO "+hi+"]",
+                                                             "sort", f + (fwd ? " asc" : " desc") +
+                                                             // secondary on id for determinism
+                                                             ", id asc")
+                                                      ).getResults();
+        assertEquals(results.toString(), (1L + hi - lo), results.getNumFound());
+        if (null != previous) {
+          assertEquals(prevField + " vs " + f,
+                       previous.getNumFound(), results.getNumFound());
+          for (int d = 0; d < results.size(); d++) {
+            assertEquals(prevField + " vs " + f + ": " + d,
+                         previous.get(d).getFieldValue("id"),
+                         results.get(d).getFieldValue("id"));
+            assertEquals(prevField + " vs " + f + ": " + d,
+                         previous.get(d).getFieldValue(prevField),
+                         results.get(d).getFieldValue(f));
+            
+          }
+        }
+        previous = results;
+        prevField = f;
+      }
+    }
+    
+  }
+}
--- a/solr/core/src/test/org/apache/solr/search/function/TestMinMaxOnMultiValuedField.java
+++ b/solr/core/src/test/org/apache/solr/search/function/TestMinMaxOnMultiValuedField.java
@ -59,6 +59,11 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
                              "date_missf_", "date_missl_",
                              "enum_missf_", "enum_missl_",
                              "bool_missf_", "bool_missl_"  }, new String [] {"_dv"});
+    checkFields(new String[] {"stxt_", // no expectation on missing first/last
+                              "stxt_missf_", "stxt_missl_" }, new String [] { "_dv"});
+    checkFields(new String [] { "stxt_" }, // no expectation on missing first/last
+                new String [] { "_nodv", "_dv" });
+    checkFields(new String [] { "stxt_missf_", "stxt_missl_" }, new String [] { "_dv"});
      
  }
  
@ -71,8 +76,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
        SchemaField sf = schema.getField(f);
        assertTrue(f + " is not multivalued", sf.multiValued());
        assertEquals(f + " doesn't have expected docValues status",
-                     f.contains("dv") || f.endsWith("_p")
-                     || Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP), sf.hasDocValues());
+                     ((f.contains("dv") || f.endsWith("_p") || Boolean.getBoolean(NUMERIC_DOCVALUES_SYSPROP))
+                      && !f.contains("nodv")),
+                     sf.hasDocValues());
        assertEquals(f + " doesn't have expected index status",
                     ! f.contains("ni"), sf.indexed());

@ -178,19 +184,27 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
  }

  public void testBasicStrings() {
+    checkBasicStrings("val_strs_dv");
+  }
+  public void testBasicSortableText() {
+    checkBasicStrings("val_stxt_s_dv");
+    checkBasicStrings("val_stxt_missf_s_dv");
+    checkBasicStrings("val_stxt_missl_s_dv");
+  }
+  private void checkBasicStrings(final String field) {
    assertU(adoc(sdoc("id", "1",
-                      "val_strs_dv", "dog",
-                      "val_strs_dv", "xyz",
-                      "val_strs_dv", "cat")));
-    assertU(adoc(sdoc("id", "2"))); // 2 has no val_strs_dv values
+                      field, "dog",
+                      field, "xyz",
+                      field, "cat")));
+    assertU(adoc(sdoc("id", "2"))); // 2 has no values in tested field
    assertU(commit());

    // id=1: has values
    assertQ(req("q","id:1"
-                ,"fl","exists_min_str:exists(field(val_strs_dv,min))"
-                ,"fl","exists_max_str:exists(field(val_strs_dv,max))"
-                ,"fl","min_str:field(val_strs_dv,min)"
-                ,"fl","max_str:field(val_strs_dv,max)"
+                ,"fl","exists_min_str:exists(field("+field+",min))"
+                ,"fl","exists_max_str:exists(field("+field+",max))"
+                ,"fl","min_str:field("+field+",min)"
+                ,"fl","max_str:field("+field+",max)"
                
                )
            ,"//*[@numFound='1']"
@ -201,10 +215,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
            );
    // id=2: no values
    assertQ(req("q","id:2"
-                ,"fl","exists_min_str:exists(field(val_strs_dv,min))"
-                ,"fl","exists_max_str:exists(field(val_strs_dv,max))"
-                ,"fl","min_str:field(val_strs_dv,min)"
-                ,"fl","max_str:field(val_strs_dv,max)"
+                ,"fl","exists_min_str:exists(field("+field+",min))"
+                ,"fl","exists_max_str:exists(field("+field+",max))"
+                ,"fl","min_str:field("+field+",min)"
+                ,"fl","max_str:field("+field+",max)"
                
                )
            ,"//*[@numFound='1']"
@ -219,6 +233,10 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
    testExpectedSortOrdering("val_strs_dv", false,
                             null, "a", "cat", "dog", "wako", "xyz", "zzzzz");
  }
+  public void testExpectedSortOrderingSortableText() {
+    testExpectedSortOrdering("val_stxt_s_dv", false,
+                             null, "a", "cat", "dog", "wako", "xyz", "zzzzz");
+  }

  public void testExpectedSortMissingOrderings() {

@ -226,7 +244,9 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
    // (in this simple test) we aren't using a secondary sort, so there is no way to disambiguate
    // docs that have those values from docs that have those *effective* sort values

-    testSortMissingMinMax("val_str", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
+    testSortMissingMinMax("val_str",  "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
+    testSortMissingMinMax("val_stxt", "a", "aaaaaa", "xxxxx", "zzzzzzzzzzzzzzzzzzz");
+    
    testSortMissingMinMax("val_int",
                          Integer.MIN_VALUE+1L, -9999, 0, 99999, Integer.MAX_VALUE-1L);
    testSortMissingMinMax("val_long",
@ -382,6 +402,15 @@ public class TestMinMaxOnMultiValuedField extends SolrTestCaseJ4 {
              "docValues",
              req("q","*:*", "fl", "field(cat,'max')"),
              SolrException.ErrorCode.BAD_REQUEST);
+    assertQEx("no error mentioning field name when asking for max on a non-dv sortable text field",
+              "val_stxt_s_nodv",
+              req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"),
+              SolrException.ErrorCode.BAD_REQUEST);
+    assertQEx("no error mentioning 'docValues' when asking for max on a non-dv sortable field",
+              "docValues",
+              req("q","*:*", "fl", "field(val_stxt_s_nodv,'max')"),
+              SolrException.ErrorCode.BAD_REQUEST);
+
    
  }

--- a/solr/solr-ref-guide/src/analyzers.adoc
+++ b/solr/solr-ref-guide/src/analyzers.adoc
@ -20,7 +20,7 @@ An analyzer examines the text of fields and generates a token stream.

 Analyzers are specified as a child of the `<fieldType>` element in the `schema.xml` configuration file (in the same `conf/` directory as `solrconfig.xml`).

-In normal usage, only fields of type `solr.TextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `<analyzer>` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example:
+In normal usage, only fields of type `solr.TextField` or `solr.SortableTextField` will specify an analyzer. The simplest way to configure an analyzer is with a single `<analyzer>` element whose class attribute is a fully qualified Java class name. The named class must derive from `org.apache.lucene.analysis.Analyzer`. For example:

 [source,xml]
 ----
--- a/solr/solr-ref-guide/src/common-query-parameters.adoc
+++ b/solr/solr-ref-guide/src/common-query-parameters.adoc
@ -37,10 +37,11 @@ Solr can sort query responses according to:
 * Document scores
 * <<function-queries.adoc#sort-by-function,Function results>>
 * The value of any primative field (numerics, string, boolean, dates, etc...) which has `docValues="true"` (or `multiValued="false"` and `indexed="true"` in which case the indexed terms will used to build DocValue like structures on the fly at runtime)
-* A TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term.
+* A SortableTextField which implicitly uses `docValues="true"` by default to allow sorting on the original input string regardless of the analyzers used for Searching.
+* A single-valued TextField that uses an analyzer (such as the KeywordTokenizer) that produces only a single term per document.  TextField does not support docValues="true", but a DocValue like structure will be built on the fly at runtime.
 ** *NOTE:* If you want to be able to sort on a field whose contents you want to tokenize to facilitate searching, <<copying-fields.adoc#copying-fields,use a `copyField` directive>> in the the Schema to clone the field. Then search on the field and sort on its clone.

-In the case of primative fields that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting.  This default behavior is equivilent to explicitly sorting using the 2 argument `<<function-queries.adoc#field-function,field()>>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc`
+In the case of primative fields, or SortableTextFields, that are `multiValued="true"` the representantive value used for each doc when sorting depends on the sort direction: The minimum value in each document is used for ascending (`asc`) sorting, while the maximal value in each document is used for descending (`desc`) sorting.  This default behavior is equivilent to explicitly sorting using the 2 argument `<<function-queries.adoc#field-function,field()>>` function: `sort=field(name,min) asc` and `sort=field(name,max) desc`

 The table below explains how Solr responds to various settings of the `sort` parameter.

--- a/solr/solr-ref-guide/src/field-types-included-with-solr.adoc
+++ b/solr/solr-ref-guide/src/field-types-included-with-solr.adoc
@ -69,6 +69,8 @@ Configuration and usage of PreAnalyzedField is documented in the section  <<work

 |StrField |String (UTF-8 encoded string or Unicode). Strings are intended for small fields and are _not_ tokenized or analyzed in any way. They have a hard limit of slightly less than 32K.

+|SortableTextField |A specialized version of TextField that allows (and defaults to) `docValues="true"` for sorting on the first 1024 characters of the original string prior to analysis -- the number of characters used for sorting can be overridden with the `maxCharsForDocValues` attribute.
+
 |TextField |Text, usually multiple words or tokens.

 |TrieDateField |*Deprecated*. Use DatePointField instead.
@ -91,4 +93,4 @@ Configuration and usage of PreAnalyzedField is documented in the section  <<work
 NOTE: All Trie* numeric and date field types have been deprecated in favor of *Point field types.
      Point field types are better at range queries (speed, memory, disk), however simple field:value queries underperform
      relative to Trie. Either accept this, or continue to use Trie fields.
-      This shortcoming may be addressed in a future release.
+      This shortcoming may be addressed in a future release.