From 5b2594350df11ef54d52f417b34c6d082ad85e89 Mon Sep 17 00:00:00 2001
From: Noble Paul <noble@apache.org>
Date: Tue, 29 Nov 2016 08:05:47 +0530
Subject: [PATCH 01/53] SOLR-9784: added deprecation javadocs

---
 .../org/apache/solr/client/solrj/impl/CloudSolrClient.java    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
index 9c59d4f69e1..3b694843995 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
@@ -645,6 +645,8 @@ public class CloudSolrClient extends SolrClient {
    * are allowing client access to zookeeper, you should protect the
    * /configs node against unauthorised write access.
    *
+   *  @deprecated Please use {@link ZkClientClusterStateProvider#uploadConfig(Path, String)} instead
+   *
    * @param configPath {@link java.nio.file.Path} to the config files
    * @param configName the name of the config
    * @throws IOException if an IO error occurs
@@ -665,6 +667,8 @@ public class CloudSolrClient extends SolrClient {
 
   /**
    * Download a named config from Zookeeper to a location on the filesystem
+   *
+   * @deprecated Please use {@link ZkClientClusterStateProvider#downloadConfig(String, Path)} instead
    * @param configName    the name of the config
    * @param downloadPath  the path to write config files to
    * @throws IOException  if an I/O exception occurs

From 70b358960dfe8a6da35991b2a84c93cc9370c3d8 Mon Sep 17 00:00:00 2001
From: Noble Paul <noble@apache.org>
Date: Tue, 29 Nov 2016 18:02:59 +0530
Subject: [PATCH 02/53] SOLR-9546: remove unnecessary boxing

---
 .../solr/search/mlt/CloudMLTQParser.java      | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
index 0f85feb13c9..0f46725eb27 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
@@ -69,26 +69,19 @@ public class CloudMLTQParser extends QParser {
     Map<String,Float> boostFields = new HashMap<>();
     MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
     
-    if(localParams.getInt("mintf") != null)
-      mlt.setMinTermFreq(localParams.getInt("mintf"));
+    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
 
     mlt.setMinDocFreq(localParams.getInt("mindf", 0));
 
-    if(localParams.get("minwl") != null)
-      mlt.setMinWordLen(localParams.getInt("minwl"));
+    mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
 
-    if(localParams.get("maxwl") != null)
-      mlt.setMaxWordLen(localParams.getInt("maxwl"));
+    mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
 
-    if(localParams.get("maxqt") != null)
-      mlt.setMaxQueryTerms(localParams.getInt("maxqt"));
+    mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
 
-    if(localParams.get("maxntp") != null)
-      mlt.setMaxNumTokensParsed(localParams.getInt("maxntp"));
+    mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
     
-    if(localParams.get("maxdf") != null) {
-      mlt.setMaxDocFreq(localParams.getInt("maxdf"));
-    }
+    mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
 
     if(localParams.get("boost") != null) {
       mlt.setBoost(localParams.getBool("boost"));

From 02c687758e904ab92c2b766b2ec837bcb99f484f Mon Sep 17 00:00:00 2001
From: Christine Poerschke <cpoerschke@apache.org>
Date: Mon, 28 Nov 2016 19:58:25 +0100
Subject: [PATCH 03/53] SOLR-9783:
 (Search|Top)Group[s]ShardResponseProcessor.process: turned sortWithinGroup
 null check into assert. Also sort.equals tweak in (grouping)
 QueryCommand.create method.

---
 solr/CHANGES.txt                                              | 3 +++
 .../search/grouping/distributed/command/QueryCommand.java     | 2 +-
 .../responseprocessor/SearchGroupShardResponseProcessor.java  | 4 +---
 .../responseprocessor/TopGroupsShardResponseProcessor.java    | 4 +---
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 422f1c658e4..cead9af537e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -245,6 +245,9 @@ Other Changes
 
 * SOLR-9801: Upgrade jetty to 9.3.14.v20161028 (shalin)
 
+* SOLR-9783: (Search|Top)Group[s]ShardResponseProcessor.process: turned sortWithinGroup null check into assert.
+  (Christine Poerschke)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
index 86fe729447e..afb8ba78a9c 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
@@ -124,7 +124,7 @@ public class QueryCommand implements Command<QueryCommandResult> {
 
   @Override
   public List<Collector> create() throws IOException {
-    if (sort == null || sort == Sort.RELEVANCE) {
+    if (sort == null || sort.equals(Sort.RELEVANCE)) {
       collector = TopScoreDocCollector.create(docsToCollect);
     } else {
       collector = TopFieldCollector.create(sort, docsToCollect, true, needScores, needScores);
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java
index 18b0de54a44..0acd6f90e27 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java
@@ -52,9 +52,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor
     Sort groupSort = rb.getGroupingSpec().getGroupSort();
     final String[] fields = rb.getGroupingSpec().getFields();
     Sort sortWithinGroup = rb.getGroupingSpec().getSortWithinGroup();
-    if (sortWithinGroup == null) { // TODO prevent it from being null in the first place
-      sortWithinGroup = Sort.RELEVANCE;
-    }
+    assert sortWithinGroup != null;
 
     final Map<String, List<Collection<SearchGroup<BytesRef>>>> commandSearchGroups = new HashMap<>(fields.length, 1.0f);
     final Map<String, Map<SearchGroup<BytesRef>, Set<String>>> tempSearchGroupToShards = new HashMap<>(fields.length, 1.0f);
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
index 688a6c37011..3610a383ccb 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
@@ -61,9 +61,7 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor {
     String[] fields = rb.getGroupingSpec().getFields();
     String[] queries = rb.getGroupingSpec().getQueries();
     Sort sortWithinGroup = rb.getGroupingSpec().getSortWithinGroup();
-    if (sortWithinGroup == null) { // TODO prevent it from being null in the first place
-      sortWithinGroup = Sort.RELEVANCE;
-    }
+    assert sortWithinGroup != null;
 
     // If group.format=simple group.offset doesn't make sense
     int groupOffsetDefault;

From 590d31f311c092aa97bc64b1a28a9dbf934b0e52 Mon Sep 17 00:00:00 2001
From: Andrzej Bialecki <ab@apache.org>
Date: Tue, 29 Nov 2016 21:11:40 +0100
Subject: [PATCH 04/53] SOLR-9768 RecordingJsonParser produces incomplete json
 (Wojciech Stryszyk via ab)

---
 solr/CHANGES.txt                              |  2 ++
 .../apache/solr/util/RecordingJSONParser.java | 17 +++++++--
 .../common/util/TestJsonRecordReader.java     | 35 +++++++++++++++----
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index cead9af537e..448f2d7adbe 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -216,6 +216,8 @@ Bug Fixes
 
 * SOLR-5260: Facet search on a docvalue field in a multi shard collection (Trym Møller, Erick Erickson)
 
+* SOLR-9768: RecordingJsonParser produces incomplete json (Wojciech Stryszyk via ab)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/util/RecordingJSONParser.java b/solr/core/src/java/org/apache/solr/util/RecordingJSONParser.java
index 030913d349f..a85610b7cb2 100644
--- a/solr/core/src/java/org/apache/solr/util/RecordingJSONParser.java
+++ b/solr/core/src/java/org/apache/solr/util/RecordingJSONParser.java
@@ -29,7 +29,9 @@ public class RecordingJSONParser extends JSONParser {
 
   private StringBuilder sb = new StringBuilder();
   private boolean objectStarted = false;
-  public long lastMarkedPosition = 0;
+  private long lastMarkedPosition = 0;
+  private long lastGlobalPosition = 0;
+  private static final int BUFFER_SIZE = 8192;
 
 
   public RecordingJSONParser(Reader in) {
@@ -39,7 +41,7 @@ public class RecordingJSONParser extends JSONParser {
   }
 
   static char[] getChars() {
-    buf.set(new char[8192]);
+    buf.set(new char[BUFFER_SIZE]);
     return buf.get();
   }
 
@@ -68,11 +70,22 @@ public class RecordingJSONParser extends JSONParser {
     if(currPosition < 0){
       System.out.println("ERROR");
     }
+
     if (currPosition > lastMarkedPosition) {
       for (long i = lastMarkedPosition; i < currPosition; i++) {
         recordChar(bufCopy[(int) i]);
       }
+    } else if (currPosition < lastMarkedPosition) {
+      for (long i = 0; i < currPosition; i++) {
+        recordChar(bufCopy[(int) i]);
+      }
+    } else if (currPosition == BUFFER_SIZE && lastGlobalPosition != globalPosition) {
+      for (long i = 0; i < currPosition; i++) {
+        recordChar(bufCopy[(int) i]);
+      }
     }
+
+    lastGlobalPosition = globalPosition;
     lastMarkedPosition = currPosition;
   }
 
diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java b/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java
index d59dea39146..da75a43cf15 100644
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJsonRecordReader.java
@@ -16,11 +16,6 @@
  */
 package org.apache.solr.common.util;
 
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.util.RecordingJSONParser;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.IOException;
 import java.io.StringReader;
 import java.lang.invoke.MethodHandles;
@@ -31,6 +26,12 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.util.RecordingJSONParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 
 public class TestJsonRecordReader extends SolrTestCaseJ4 {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -129,18 +130,32 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 {
         "    \"nested_inside\" : \"check check check 1\"\n" +
         "  }\n" +
         "}";
+
     String json2 =
         " {\n" +
             "  \"id\" : \"345\",\n" +
+            "  \"payload\": \""+ StringUtils.repeat("0123456789", 819) +
+            "\",\n" +
             "  \"description\": \"Testing /json/docs srcField 2\",\n" +
             "\n" +
             "  \"nested_data\" : {\n" +
             "    \"nested_inside\" : \"check check check 2\"\n" +
             "  }\n" +
             "}";
-    JsonRecordReader streamer = JsonRecordReader.getInst("/", Arrays.asList("id:/id"));
-    RecordingJSONParser parser = new RecordingJSONParser(new StringReader(json + json2));
 
+    String json3 =
+        " {\n" +
+            "  \"id\" : \"678\",\n" +
+            "  \"description\": \"Testing /json/docs srcField 3\",\n" +
+            "\n" +
+            "  \"nested_data\" : {\n" +
+            "    \"nested_inside\" : \"check check check 3\"\n" +
+            "  }\n" +
+            "}";
+
+
+    JsonRecordReader streamer = JsonRecordReader.getInst("/", Arrays.asList("id:/id"));
+    RecordingJSONParser parser = new RecordingJSONParser(new StringReader(json + json2 + json3));
 
     streamer.streamRecords(parser, new JsonRecordReader.Handler() {
       int count = 0;
@@ -162,6 +177,12 @@ public class TestJsonRecordReader extends SolrTestCaseJ4 {
           assertEquals(m.get("description"), "Testing /json/docs srcField 2");
           assertEquals(((Map) m.get("nested_data")).get("nested_inside"), "check check check 2");
         }
+        if (count++ == 3) {
+          assertEquals(m.get("id"), "678");
+          assertEquals(m.get("description"), "Testing /json/docs srcField 3");
+          assertEquals(((Map) m.get("nested_data")).get("nested_inside"), "check check check 3");
+        }
+
       }
     });
 

From a7fa920b52febb80be70210caad7db1eeaf0f97a Mon Sep 17 00:00:00 2001
From: Christine Poerschke <cpoerschke@apache.org>
Date: Tue, 29 Nov 2016 19:54:47 +0100
Subject: [PATCH 05/53] SOLR-9660: in GroupingSpecification factor
 [group](sort|offset|limit) into [group](sortSpec) (Judith Silverman,
 Christine Poerschke)

---
 solr/CHANGES.txt                              |  3 +
 .../handler/component/QueryComponent.java     | 34 ++++-----
 .../component/QueryElevationComponent.java    | 28 +++-----
 .../apache/solr/search/SolrIndexSearcher.java | 26 +++++++
 .../java/org/apache/solr/search/SortSpec.java | 10 +++
 .../grouping/GroupingSpecification.java       | 70 +++++++++++--------
 .../TopGroupsShardResponseProcessor.java      |  4 +-
 .../GroupedEndResultTransformer.java          |  4 +-
 8 files changed, 112 insertions(+), 67 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 448f2d7adbe..1584647e787 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -250,6 +250,9 @@ Other Changes
 * SOLR-9783: (Search|Top)Group[s]ShardResponseProcessor.process: turned sortWithinGroup null check into assert.
   (Christine Poerschke)
 
+* SOLR-9660: in GroupingSpecification factor [group](sort|offset|limit) into [group](sortSpec)
+  (Judith Silverman, Christine Poerschke)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index fb6fec94111..84ade43c7a5 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -252,21 +252,27 @@ public class QueryComponent extends SearchComponent
     final SortSpec sortSpec = rb.getSortSpec();
 
     //TODO: move weighting of sort
-    Sort groupSort = searcher.weightSort(sortSpec.getSort());
-    if (groupSort == null) {
-      groupSort = Sort.RELEVANCE;
-    }
+    final SortSpec groupSortSpec = searcher.weightSortSpec(sortSpec, Sort.RELEVANCE);
 
     // groupSort defaults to sort
     String sortWithinGroupStr = params.get(GroupParams.GROUP_SORT);
     //TODO: move weighting of sort
-    Sort sortWithinGroup = sortWithinGroupStr == null ?  groupSort : searcher.weightSort(SortSpecParsing.parseSortSpec(sortWithinGroupStr, req).getSort());
-    if (sortWithinGroup == null) {
-      sortWithinGroup = Sort.RELEVANCE;
+    final SortSpec sortSpecWithinGroup;
+    if (sortWithinGroupStr != null) {
+      SortSpec parsedSortSpecWithinGroup = SortSpecParsing.parseSortSpec(sortWithinGroupStr, req);
+      sortSpecWithinGroup = searcher.weightSortSpec(parsedSortSpecWithinGroup, Sort.RELEVANCE);
+    } else {
+      sortSpecWithinGroup = new SortSpec(
+          groupSortSpec.getSort(),
+          groupSortSpec.getSchemaFields(),
+          groupSortSpec.getCount(),
+          groupSortSpec.getOffset());
     }
+    sortSpecWithinGroup.setOffset(params.getInt(GroupParams.GROUP_OFFSET, 0));
+    sortSpecWithinGroup.setCount(params.getInt(GroupParams.GROUP_LIMIT, 1));
 
-    groupingSpec.setSortWithinGroup(sortWithinGroup);
-    groupingSpec.setGroupSort(groupSort);
+    groupingSpec.setSortSpecWithinGroup(sortSpecWithinGroup);
+    groupingSpec.setGroupSortSpec(groupSortSpec);
 
     String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
     Grouping.Format responseFormat;
@@ -280,10 +286,6 @@ public class QueryComponent extends SearchComponent
     groupingSpec.setFields(params.getParams(GroupParams.GROUP_FIELD));
     groupingSpec.setQueries(params.getParams(GroupParams.GROUP_QUERY));
     groupingSpec.setFunctions(params.getParams(GroupParams.GROUP_FUNC));
-    groupingSpec.setGroupOffset(params.getInt(GroupParams.GROUP_OFFSET, 0));
-    groupingSpec.setGroupLimit(params.getInt(GroupParams.GROUP_LIMIT, 1));
-    groupingSpec.setOffset(sortSpec.getOffset());
-    groupingSpec.setLimit(sortSpec.getCount());
     groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
     groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
     groupingSpec.setNeedScore((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0);
@@ -415,7 +417,7 @@ public class QueryComponent extends SearchComponent
               .setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
               .setSearcher(searcher);
 
-          int docsToCollect = Grouping.getMax(groupingSpec.getGroupOffset(), groupingSpec.getGroupLimit(), searcher.maxDoc());
+          int docsToCollect = Grouping.getMax(groupingSpec.getWithinGroupOffset(), groupingSpec.getWithinGroupLimit(), searcher.maxDoc());
           docsToCollect = Math.max(docsToCollect, 1);
 
           for (String field : groupingSpec.getFields()) {
@@ -477,8 +479,8 @@ public class QueryComponent extends SearchComponent
             .setDefaultFormat(groupingSpec.getResponseFormat())
             .setLimitDefault(limitDefault)
             .setDefaultTotalCount(defaultTotalCount)
-            .setDocsPerGroupDefault(groupingSpec.getGroupLimit())
-            .setGroupOffsetDefault(groupingSpec.getGroupOffset())
+            .setDocsPerGroupDefault(groupingSpec.getWithinGroupLimit())
+            .setGroupOffsetDefault(groupingSpec.getWithinGroupOffset())
             .setGetGroupedDocSet(groupingSpec.isTruncateGroups());
 
         if (groupingSpec.getFields() != null) {
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
index 4dde8ef539e..f72fc89a66a 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
@@ -460,15 +460,15 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
       // alter the sorting in the grouping specification if there is one
       GroupingSpecification groupingSpec = rb.getGroupingSpec();
       if(groupingSpec != null) {
-        SortField[] groupSort = groupingSpec.getGroupSort().getSort();
-        Sort modGroupSort = this.modifySort(groupSort, force, comparator);
-        if(modGroupSort != null) {
-          groupingSpec.setGroupSort(modGroupSort);
+        SortSpec groupSortSpec = groupingSpec.getGroupSortSpec();
+        SortSpec modGroupSortSpec = this.modifySortSpec(groupSortSpec, force, comparator);
+        if (modGroupSortSpec != null) {
+          groupingSpec.setGroupSortSpec(modGroupSortSpec);
         }
-        SortField[] withinGroupSort = groupingSpec.getSortWithinGroup().getSort();
-        Sort modWithinGroupSort = this.modifySort(withinGroupSort, force, comparator);
-        if(modWithinGroupSort != null) {
-          groupingSpec.setSortWithinGroup(modWithinGroupSort);
+        SortSpec withinGroupSortSpec = groupingSpec.getSortSpecWithinGroup();
+        SortSpec modWithinGroupSortSpec = this.modifySortSpec(withinGroupSortSpec, force, comparator);
+        if (modWithinGroupSortSpec != null) {
+          groupingSpec.setSortSpecWithinGroup(modWithinGroupSortSpec);
         }
       }
     }
@@ -494,12 +494,6 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
     }
   }
 
-  private Sort modifySort(SortField[] current, boolean force, ElevationComparatorSource comparator) {
-    SortSpec tmp = new SortSpec(new Sort(current), Arrays.asList(new SchemaField[current.length]));
-    tmp = modifySortSpec(tmp, force, comparator);
-    return null == tmp ? null : tmp.getSort();
-  }
-
   private SortSpec modifySortSpec(SortSpec current, boolean force, ElevationComparatorSource comparator) {
     boolean modify = false;
     SortField[] currentSorts = current.getSort().getSort();
@@ -526,9 +520,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
     }
     if (modify) {
       SortSpec newSpec = new SortSpec(new Sort(sorts.toArray(new SortField[sorts.size()])),
-                                      fields);
-      newSpec.setOffset(current.getOffset());
-      newSpec.setCount(current.getCount());
+                                      fields,
+                                      current.getCount(),
+                                      current.getOffset());
       return newSpec;
     }
     return null;
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index bf381f4c332..6d13b515076 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -924,6 +924,32 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
     return (sort != null) ? sort.rewrite(this) : null;
   }
 
+  /** Returns a weighted sort spec according to this searcher */
+  public SortSpec weightSortSpec(SortSpec originalSortSpec, Sort nullEquivalent) throws IOException {
+    return implWeightSortSpec(
+        originalSortSpec.getSort(),
+        originalSortSpec.getCount(),
+        originalSortSpec.getOffset(),
+        nullEquivalent);
+  }
+
+  /** Returns a weighted sort spec according to this searcher */
+  private SortSpec implWeightSortSpec(Sort originalSort, int num, int offset, Sort nullEquivalent) throws IOException {
+    Sort rewrittenSort = weightSort(originalSort);
+    if (rewrittenSort == null) {
+      rewrittenSort = nullEquivalent;
+    }
+
+    final SortField[] rewrittenSortFields = rewrittenSort.getSort();
+    final SchemaField[] rewrittenSchemaFields = new SchemaField[rewrittenSortFields.length];
+    for (int ii = 0; ii < rewrittenSortFields.length; ++ii) {
+      final String fieldName = rewrittenSortFields[ii].getField();
+      rewrittenSchemaFields[ii] = (fieldName == null ? null : schema.getFieldOrNull(fieldName));
+    }
+
+    return new SortSpec(rewrittenSort, rewrittenSchemaFields, num, offset);
+  }
+
   /**
    * Returns the first document number containing the term <code>t</code> Returns -1 if no document was found. This
    * method is primarily intended for clients that want to fetch documents using a unique identifier."
diff --git a/solr/core/src/java/org/apache/solr/search/SortSpec.java b/solr/core/src/java/org/apache/solr/search/SortSpec.java
index 8cd954c0354..b79ed0a094f 100644
--- a/solr/core/src/java/org/apache/solr/search/SortSpec.java
+++ b/solr/core/src/java/org/apache/solr/search/SortSpec.java
@@ -34,9 +34,19 @@ public class SortSpec
   private int num = 10;
   private int offset = 0;
 
+  public SortSpec(Sort sort, List<SchemaField> fields, int num, int offset) {
+    setSortAndFields(sort, fields);
+    this.num = num;
+    this.offset = offset;
+  }
   public SortSpec(Sort sort, List<SchemaField> fields) {
     setSortAndFields(sort, fields);
   }
+  public SortSpec(Sort sort, SchemaField[] fields, int num, int offset) {
+    setSortAndFields(sort, Arrays.asList(fields));
+    this.num = num;
+    this.offset = offset;
+  }
   public SortSpec(Sort sort, SchemaField[] fields) {
     setSortAndFields(sort, Arrays.asList(fields));
   }
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
index fbe0aced053..4194dd087f8 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/GroupingSpecification.java
@@ -18,6 +18,7 @@ package org.apache.solr.search.grouping;
 
 import org.apache.lucene.search.Sort;
 import org.apache.solr.search.Grouping;
+import org.apache.solr.search.SortSpec;
 
 /**
  * Encapsulates the grouping options like fields group sort and more specified by clients.
@@ -29,12 +30,8 @@ public class GroupingSpecification {
   private String[] fields = new String[]{};
   private String[] queries = new String[]{};
   private String[] functions = new String[]{};
-  private int offset;
-  private int limit;
-  private int groupOffset;
-  private int groupLimit;
-  private Sort groupSort;
-  private Sort sortWithinGroup;
+  private SortSpec groupSortSpec;
+  private SortSpec sortSpecWithinGroup;
   private boolean includeGroupCount;
   private boolean main;
   private Grouping.Format responseFormat;
@@ -77,53 +74,49 @@ public class GroupingSpecification {
     this.functions = functions;
   }
 
+  @Deprecated
+  public int getWithinGroupOffset() {
+    return sortSpecWithinGroup.getOffset();
+  }
+  @Deprecated
   public int getGroupOffset() {
-    return groupOffset;
+    return getWithinGroupOffset();
   }
 
-  public void setGroupOffset(int groupOffset) {
-    this.groupOffset = groupOffset;
-  }
 
+  @Deprecated
+  public int getWithinGroupLimit() {
+    return sortSpecWithinGroup.getCount();
+  }
+  @Deprecated
   public int getGroupLimit() {
-    return groupLimit;
+    return getWithinGroupLimit();
   }
 
-  public void setGroupLimit(int groupLimit) {
-    this.groupLimit = groupLimit;
-  }
 
+  @Deprecated
   public int getOffset() {
-    return offset;
+    return groupSortSpec.getOffset();
   }
 
-  public void setOffset(int offset) {
-    this.offset = offset;
-  }
 
+  @Deprecated
   public int getLimit() {
-    return limit;
+    return groupSortSpec.getCount();
   }
 
-  public void setLimit(int limit) {
-    this.limit = limit;
-  }
 
+  @Deprecated
   public Sort getGroupSort() {
-    return groupSort;
+    return groupSortSpec.getSort();
   }
 
-  public void setGroupSort(Sort groupSort) {
-    this.groupSort = groupSort;
-  }
 
+  @Deprecated
   public Sort getSortWithinGroup() {
-    return sortWithinGroup;
+    return sortSpecWithinGroup.getSort();
   }
 
-  public void setSortWithinGroup(Sort sortWithinGroup) {
-    this.sortWithinGroup = sortWithinGroup;
-  }
 
   public boolean isIncludeGroupCount() {
     return includeGroupCount;
@@ -164,4 +157,21 @@ public class GroupingSpecification {
   public void setTruncateGroups(boolean truncateGroups) {
     this.truncateGroups = truncateGroups;
   }
+
+  public SortSpec getGroupSortSpec() {
+    return groupSortSpec;
+  }
+
+  public void setGroupSortSpec(SortSpec groupSortSpec) {
+    this.groupSortSpec = groupSortSpec;
+  }
+
+  public SortSpec getSortSpecWithinGroup() {
+    return sortSpecWithinGroup;
+  }
+
+  public void setSortSpecWithinGroup(SortSpec sortSpecWithinGroup) {
+    this.sortSpecWithinGroup = sortSpecWithinGroup;
+  }
+
 }
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
index 3610a383ccb..7e38e5dead2 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
@@ -68,9 +68,9 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor {
     if (rb.getGroupingSpec().getResponseFormat() == Grouping.Format.simple || rb.getGroupingSpec().isMain()) {
       groupOffsetDefault = 0;
     } else {
-      groupOffsetDefault = rb.getGroupingSpec().getGroupOffset();
+      groupOffsetDefault = rb.getGroupingSpec().getWithinGroupOffset();
     }
-    int docsPerGroupDefault = rb.getGroupingSpec().getGroupLimit();
+    int docsPerGroupDefault = rb.getGroupingSpec().getWithinGroupLimit();
 
     Map<String, List<TopGroups<BytesRef>>> commandTopGroups = new HashMap<>();
     for (String field : fields) {
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java
index f8c9872a7a6..47b5276a1eb 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/endresulttransformer/GroupedEndResultTransformer.java
@@ -79,7 +79,7 @@ public class GroupedEndResultTransformer implements EndResultTransformer {
           if (!Float.isNaN(group.maxScore)) {
             docList.setMaxScore(group.maxScore);
           }
-          docList.setStart(rb.getGroupingSpec().getGroupOffset());
+          docList.setStart(rb.getGroupingSpec().getWithinGroupOffset());
           for (ScoreDoc scoreDoc : group.scoreDocs) {
             docList.add(solrDocumentSource.retrieve(scoreDoc));
           }
@@ -97,7 +97,7 @@ public class GroupedEndResultTransformer implements EndResultTransformer {
         if (!Float.isNaN(queryCommandResult.getTopDocs().getMaxScore())) {
           docList.setMaxScore(queryCommandResult.getTopDocs().getMaxScore());
         }
-        docList.setStart(rb.getGroupingSpec().getGroupOffset());
+        docList.setStart(rb.getGroupingSpec().getWithinGroupOffset());
         for (ScoreDoc scoreDoc :queryCommandResult.getTopDocs().scoreDocs){
           docList.add(solrDocumentSource.retrieve(scoreDoc));
         }

From 44cce6bc4c5f3452d188cf4e8905a3ed7ef3e247 Mon Sep 17 00:00:00 2001
From: Noble Paul <noble@apache.org>
Date: Wed, 30 Nov 2016 18:47:52 +0530
Subject: [PATCH 06/53] typo in javadocs

---
 solr/solrj/src/java/org/apache/solr/common/PushWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solr/solrj/src/java/org/apache/solr/common/PushWriter.java b/solr/solrj/src/java/org/apache/solr/common/PushWriter.java
index ddfac3cca52..7829d3762c8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/PushWriter.java
+++ b/solr/solrj/src/java/org/apache/solr/common/PushWriter.java
@@ -33,7 +33,7 @@ public interface PushWriter extends Closeable {
   void writeMap(MapWriter mw) throws IOException;
 
   /**Write an array. The array is opened at the beginning of this method
-   * and closed at the end. All array entries must be returned before this
+   * and closed at the end. All array entries must be written before this
    * method returns
    *
    */

From e64bcb37ffe9ccbe1c88cb451ff147de774aec8e Mon Sep 17 00:00:00 2001
From: Ishan Chattopadhyaya <ichattopadhyaya@gmail.com>
Date: Thu, 1 Dec 2016 00:46:58 +0530
Subject: [PATCH 07/53] SOLR-9616 Solr throws exception when expand=true on
 empty index

---
 solr/CHANGES.txt                                  |  2 ++
 .../solr/handler/component/ExpandComponent.java   |  6 ++++++
 .../handler/component/TestExpandComponent.java    | 15 +++++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1584647e787..d09ae3be13b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -218,6 +218,8 @@ Bug Fixes
 
 * SOLR-9768: RecordingJsonParser produces incomplete json (Wojciech Stryszyk via ab)
 
+* SOLR-9616: Solr throws exception when expand=true on empty index (Timo Hund via Ishan Chattopadhyaya)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
index 8274d68aebd..366c4a9b7a8 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
@@ -265,6 +265,12 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
     * This code gathers the group information for the current page.
     */
     List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
+
+    if(contexts.size() == 0) {
+      //When no context is available we can skip the expanding
+      return;
+    }
+
     int currentContext = 0;
     int currentDocBase = contexts.get(currentContext).docBase;
     int nextDocBase = (currentContext+1)<contexts.size() ? contexts.get(currentContext+1).docBase : Integer.MAX_VALUE;
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java b/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java
index 7d38d625bad..f7fc13a00b8 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java
@@ -314,4 +314,19 @@ public class TestExpandComponent extends SolrTestCaseJ4 {
     );
   }
 
+  @Test
+  public void testExpandWithEmptyIndexReturnsZeroResults() {
+    //We make sure the index is cleared
+
+    clearIndex();
+    assertU(commit());
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add("q", "*:*");
+    params.add("fq", "{!collapse field=group_s}");
+    params.add("expand" ,"true");
+    params.add("expand.rows", "10");
+
+    assertQ(req(params), "*[count(//doc)=0]");
+  }
 }

From 98f75723f3bc6a718f1a7b47a50b820c4fb408f6 Mon Sep 17 00:00:00 2001
From: Steve Rowe <sarowe@apache.org>
Date: Wed, 30 Nov 2016 14:04:58 -0500
Subject: [PATCH 08/53] LUCENE-7542: Remove debug printing of parsed versions

---
 dev-tools/scripts/smokeTestRelease.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py
index 2b1ff193e4d..f9c34990418 100644
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@@ -497,7 +497,6 @@ def versionToTuple(version, name):
     versionTuple = versionTuple[:-2] + ('100',)
   elif versionTuple[-1].lower()[:2] == 'rc':
     versionTuple = versionTuple[:-2] + (versionTuple[-1][2:],)
-  print('%s: %s' % (version, versionTuple))
   return versionTuple
 
 

From c61268f7cd2c47884f98513febee6bb5f33ea6dc Mon Sep 17 00:00:00 2001
From: Anshum Gupta <anshum@apache.org>
Date: Fri, 2 Dec 2016 12:09:10 -0800
Subject: [PATCH 09/53] SOLR-9819: Upgrade Apache commons-fileupload to 1.3.2,
 fixing a security vulnerability

---
 lucene/ivy-versions.properties                  | 2 +-
 solr/CHANGES.txt                                | 2 ++
 solr/licenses/commons-fileupload-1.3.1.jar.sha1 | 1 -
 solr/licenses/commons-fileupload-1.3.2.jar.sha1 | 1 +
 4 files changed, 4 insertions(+), 2 deletions(-)
 delete mode 100644 solr/licenses/commons-fileupload-1.3.1.jar.sha1
 create mode 100644 solr/licenses/commons-fileupload-1.3.2.jar.sha1

diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 85261052a06..ffc54a89f3b 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -64,7 +64,7 @@ com.sun.jersey.version = 1.9
 /commons-collections/commons-collections = 3.2.2
 /commons-configuration/commons-configuration = 1.6
 /commons-digester/commons-digester = 2.1
-/commons-fileupload/commons-fileupload = 1.3.1
+/commons-fileupload/commons-fileupload = 1.3.2
 /commons-io/commons-io = 2.5
 /commons-lang/commons-lang = 2.6
 /commons-logging/commons-logging = 1.1.3
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d09ae3be13b..e76616942be 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -255,6 +255,8 @@ Other Changes
 * SOLR-9660: in GroupingSpecification factor [group](sort|offset|limit) into [group](sortSpec)
   (Judith Silverman, Christine Poerschke)
 
+* SOLR-9819: Upgrade commons-fileupload to 1.3.2, fixing a potential vulnerability CVE-2016-3092 (Anshum Gupta)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/licenses/commons-fileupload-1.3.1.jar.sha1 b/solr/licenses/commons-fileupload-1.3.1.jar.sha1
deleted file mode 100644
index 32f48724c86..00000000000
--- a/solr/licenses/commons-fileupload-1.3.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-c621b54583719ac0310404463d6d99db27e1052c
diff --git a/solr/licenses/commons-fileupload-1.3.2.jar.sha1 b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
new file mode 100644
index 00000000000..747b509acc2
--- /dev/null
+++ b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
@@ -0,0 +1 @@
+5d7491ed6ebd02b6a8d2305f8e6b7fe5dbd95f72
\ No newline at end of file

From fcccd317ddb44a742a0b3265fcf32923649f38cd Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Fri, 2 Dec 2016 15:26:04 -0500
Subject: [PATCH 10/53] LUCENE-7576: detect when special case automaton is
 passed to Terms.intersect

---
 lucene/CHANGES.txt                             |  4 ++++
 .../lucene/codecs/blocktree/FieldReader.java   |  3 +++
 .../java/org/apache/lucene/index/Terms.java    |  8 ++++++--
 .../org/apache/lucene/index/TestTermsEnum.java | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e62a99d1eb0..4afc5078fa2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,10 @@ Bug Fixes
 * LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
   component when preserveOriginal was set to true. (Adrien Grand)
 
+* LUCENE-7576: Fix Terms.intersect in the default codec to detect when
+  the incoming automaton is a special case and throw a clearer
+  exception than NullPointerException (Tom Mortimer via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 7f13a3264ce..4ee38262403 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -182,6 +182,9 @@ public final class FieldReader extends Terms implements Accountable {
     //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
     // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
     // can we optimize knowing that...?
+    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+    }
     return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
   }
     
diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java
index dd48ce9c189..7197e25e549 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java
@@ -49,8 +49,12 @@ public abstract class Terms {
    *  provided <code>startTerm</code> must be accepted by
    *  the automaton.
    *
-   * <p><b>NOTE</b>: the returned TermsEnum cannot
-   * seek</p>.
+   *  <p>This is an expert low-level API and will only work
+   *  for {@code NORMAL} compiled automata.  To handle any
+   *  compiled automata you should instead use
+   *  {@link CompiledAutomaton#getTermsEnum} instead.
+   *
+   *  <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
    *
    *  <p><b>NOTE</b>: the terms dictionary is free to
    *  return arbitrary terms as long as the resulted visited
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
index 3f15381e54c..a388d42ae30 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -998,4 +998,22 @@ public class TestTermsEnum extends LuceneTestCase {
     }
     dir.close();
   }
+
+  // LUCENE-7576
+  public void testIntersectRegexp() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    doc.add(newStringField("field", "foobar", Field.Store.NO));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    Fields fields = MultiFields.getFields(r);
+    CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
+    Terms terms = fields.terms("field");
+    String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
+    assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
+    r.close();
+    w.close();
+    d.close();
+  }
 }

From 58476b1675befd88776c72fb7b178c294a39edae Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Fri, 2 Dec 2016 15:30:37 -0500
Subject: [PATCH 11/53] improve IW javadocs

---
 .../org/apache/lucene/index/IndexWriter.java  | 31 +++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 68f3b3b6b2d..98687855231 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -133,19 +133,24 @@ import org.apache.lucene.util.Version;
   
   <a name="deletionPolicy"></a>
   <p>Expert: <code>IndexWriter</code> allows an optional
-  {@link IndexDeletionPolicy} implementation to be
-  specified.  You can use this to control when prior commits
-  are deleted from the index.  The default policy is {@link
-  KeepOnlyLastCommitDeletionPolicy} which removes all prior
-  commits as soon as a new commit is done (this matches
-  behavior before 2.2).  Creating your own policy can allow
-  you to explicitly keep previous "point in time" commits
-  alive in the index for some time, to allow readers to
-  refresh to the new commit without having the old commit
-  deleted out from under them.  This is necessary on
-  filesystems like NFS that do not support "delete on last
-  close" semantics, which Lucene's "point in time" search
-  normally relies on. </p>
+  {@link IndexDeletionPolicy} implementation to be specified.  You
+  can use this to control when prior commits are deleted from
+  the index.  The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
+  which removes all prior commits as soon as a new commit is
+  done.  Creating your own policy can allow you to explicitly
+  keep previous "point in time" commits alive in the index for
+  some time, either because this is useful for your application,
+  or to give readers enough time to refresh to the new commit
+  without having the old commit deleted out from under them.
+  The latter is necessary when multiple computers take turns opening
+  their own {@code IndexWriter} and {@code IndexReader}s
+  against a single shared index mounted via remote filesystems
+  like NFS which do not support "delete on last close" semantics.
+  A single computer accessing an index via NFS is fine with the
+  default deletion policy since NFS clients emulate "delete on
+  last close" locally.  That said, accessing an index via NFS
+  will likely result in poor performance compared to a local IO
+  device. </p>
 
   <a name="mergePolicy"></a> <p>Expert:
   <code>IndexWriter</code> allows you to separately change

From 8cbcbc9d956754de1fab2c626705aa6d6ab9f910 Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Fri, 2 Dec 2016 17:42:27 -0500
Subject: [PATCH 12/53] LUCENE-7576: fix other codecs to detect when special
 case automaton is passed to Terms.intersect

---
 .../org/apache/lucene/codecs/memory/DirectPostingsFormat.java  | 3 +++
 .../org/apache/lucene/codecs/memory/FSTOrdTermsReader.java     | 3 +++
 .../java/org/apache/lucene/codecs/memory/FSTTermsReader.java   | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index 3ce2abe4358..00f25cf189c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -659,6 +659,9 @@ public final class DirectPostingsFormat extends PostingsFormat {
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
       return new DirectIntersectTermsEnum(compiled, startTerm);
     }
 
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
index 305c4194381..97bbea3ddef 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
@@ -270,6 +270,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
       return new IntersectTermsEnum(compiled, startTerm);
     }
 
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
index 775f6929548..b120656688c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@@ -250,6 +250,9 @@ public class FSTTermsReader extends FieldsProducer {
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
       return new IntersectTermsEnum(compiled, startTerm);
     }
 

From 39c2f3d80fd585c7ae4a4a559d53a19a3f100061 Mon Sep 17 00:00:00 2001
From: Anshum Gupta <anshum@apache.org>
Date: Fri, 2 Dec 2016 16:42:35 -0800
Subject: [PATCH 13/53] SOLR-9819: Add new line to the end of SHA

---
 solr/licenses/commons-fileupload-1.3.2.jar.sha1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solr/licenses/commons-fileupload-1.3.2.jar.sha1 b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
index 747b509acc2..80f80fb6def 100644
--- a/solr/licenses/commons-fileupload-1.3.2.jar.sha1
+++ b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
@@ -1 +1 @@
-5d7491ed6ebd02b6a8d2305f8e6b7fe5dbd95f72
\ No newline at end of file
+5d7491ed6ebd02b6a8d2305f8e6b7fe5dbd95f72

From 5e8db2e068f2549b9619d5ac48a50c8032fc292b Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Sun, 4 Dec 2016 05:18:04 -0500
Subject: [PATCH 14/53] LUCENE-7563: use a compressed format for the in-heap
 BKD index

---
 lucene/CHANGES.txt                            |    4 +
 .../simpletext/SimpleTextBKDReader.java       |  281 ++-
 .../simpletext/SimpleTextBKDWriter.java       | 1661 +++++++++++++++++
 .../simpletext/SimpleTextPointsReader.java    |    5 +-
 .../simpletext/SimpleTextPointsWriter.java    |  188 +-
 .../codecs/lucene60/Lucene60PointsFormat.java |   10 +-
 .../lucene/codecs/lucene60/package-info.java  |    4 +-
 .../lucene/codecs/lucene62/package-info.java  |    4 +-
 .../lucene/codecs/lucene70/package-info.java  |   15 +-
 .../org/apache/lucene/index/CheckIndex.java   |  314 ++--
 .../org/apache/lucene/util/bkd/BKDReader.java |  717 ++++---
 .../org/apache/lucene/util/bkd/BKDWriter.java |  293 ++-
 .../lucene/util/bkd/HeapPointReader.java      |    7 +-
 .../lucene/util/bkd/HeapPointWriter.java      |   22 +-
 .../util/bkd/MutablePointsReaderUtils.java    |   21 +-
 .../lucene/util/bkd/OfflinePointReader.java   |    8 +-
 .../lucene/util/bkd/OfflinePointWriter.java   |   10 +-
 .../apache/lucene/util/bkd/PointReader.java   |   14 +-
 .../apache/lucene/util/bkd/PointWriter.java   |    6 +-
 .../lucene/search/TestPointQueries.java       |    3 +
 .../lucene/util/bkd/Test2BBKDPoints.java      |   11 +-
 .../org/apache/lucene/util/bkd/TestBKD.java   |   54 +
 .../org/apache/lucene/util/fst/TestFSTs.java  |    2 +-
 .../lucene/document/NearestNeighbor.java      |   44 +-
 24 files changed, 3030 insertions(+), 668 deletions(-)
 create mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4afc5078fa2..79e44e112c8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -126,6 +126,10 @@ Optimizations
 * LUCENE-7568: Optimize merging when index sorting is used but the
   index is already sorted (Jim Ferenczi via Mike McCandless)
 
+* LUCENE-7563: The BKD in-memory index for dimensional points now uses
+  a compressed format, using substantially less RAM in some cases
+  (Adrien Grand, Mike McCandless)
+
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
index a2b784afd27..488547b4dea 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
@@ -16,13 +16,17 @@
  */
 package org.apache.lucene.codecs.simpletext;
 
-
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 
+import org.apache.lucene.codecs.simpletext.SimpleTextUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.bkd.BKDReader;
 
@@ -30,15 +34,105 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_C
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_DOC_ID;
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_VALUE;
 
-class SimpleTextBKDReader extends BKDReader {
+/** Forked from {@link BKDReader} and simplified/specialized for SimpleText's usage */
 
-  public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
+final class SimpleTextBKDReader extends PointValues implements Accountable {
+  // Packed array of byte[] holding all split values in the full binary tree:
+  final private byte[] splitPackedValues; 
+  final long[] leafBlockFPs;
+  final private int leafNodeOffset;
+  final int numDims;
+  final int bytesPerDim;
+  final int bytesPerIndexEntry;
+  final IndexInput in;
+  final int maxPointsInLeafNode;
+  final byte[] minPackedValue;
+  final byte[] maxPackedValue;
+  final long pointCount;
+  final int docCount;
+  final int version;
+  protected final int packedBytesLength;
+
+  public SimpleTextBKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
                              byte[] minPackedValue, byte[] maxPackedValue, long pointCount, int docCount) throws IOException {
-    super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount, docCount);
+    this.in = in;
+    this.numDims = numDims;
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.bytesPerDim = bytesPerDim;
+    // no version check here because callers of this API (SimpleText) have no back compat:
+    bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
+    packedBytesLength = numDims * bytesPerDim;
+    this.leafNodeOffset = leafBlockFPs.length;
+    this.leafBlockFPs = leafBlockFPs;
+    this.splitPackedValues = splitPackedValues;
+    this.minPackedValue = minPackedValue;
+    this.maxPackedValue = maxPackedValue;
+    this.pointCount = pointCount;
+    this.docCount = docCount;
+    this.version = SimpleTextBKDWriter.VERSION_CURRENT;
+    assert minPackedValue.length == packedBytesLength;
+    assert maxPackedValue.length == packedBytesLength;
   }
 
-  @Override
-  protected void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
+  /** Used to track all state for a single call to {@link #intersect}. */
+  public static final class IntersectState {
+    final IndexInput in;
+    final int[] scratchDocIDs;
+    final byte[] scratchPackedValue;
+    final int[] commonPrefixLengths;
+
+    final IntersectVisitor visitor;
+
+    public IntersectState(IndexInput in, int numDims,
+                          int packedBytesLength,
+                          int maxPointsInLeafNode,
+                          IntersectVisitor visitor) {
+      this.in = in;
+      this.visitor = visitor;
+      this.commonPrefixLengths = new int[numDims];
+      this.scratchDocIDs = new int[maxPointsInLeafNode];
+      this.scratchPackedValue = new byte[packedBytesLength];
+    }
+  }
+
+  public void intersect(IntersectVisitor visitor) throws IOException {
+    intersect(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
+  }
+
+  /** Fast path: this is called when the query box fully encompasses all cells under this node. */
+  private void addAll(IntersectState state, int nodeID) throws IOException {
+    //System.out.println("R: addAll nodeID=" + nodeID);
+
+    if (nodeID >= leafNodeOffset) {
+      //System.out.println("ADDALL");
+      visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
+      // TODO: we can assert that the first value here in fact matches what the index claimed?
+    } else {
+      addAll(state, 2*nodeID);
+      addAll(state, 2*nodeID+1);
+    }
+  }
+
+  /** Create a new {@link IntersectState} */
+  public IntersectState getIntersectState(IntersectVisitor visitor) {
+    return new IntersectState(in.clone(), numDims,
+                              packedBytesLength,
+                              maxPointsInLeafNode,
+                              visitor);
+  }
+
+  /** Visits all docIDs and packed values in a single leaf block */
+  public void visitLeafBlockValues(int nodeID, IntersectState state) throws IOException {
+    int leafID = nodeID - leafNodeOffset;
+
+    // Leaf node; scan and filter all points in this block:
+    int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+
+    // Again, this time reading values and checking with the visitor
+    visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
+  }
+
+  void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
     BytesRefBuilder scratch = new BytesRefBuilder();
     in.seek(blockFP);
     readLine(in, scratch);
@@ -50,8 +144,7 @@ class SimpleTextBKDReader extends BKDReader {
     }
   }
 
-  @Override
-  protected int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
+  int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
     BytesRefBuilder scratch = new BytesRefBuilder();
     in.seek(blockFP);
     readLine(in, scratch);
@@ -63,8 +156,7 @@ class SimpleTextBKDReader extends BKDReader {
     return count;
   }
 
-  @Override
-  protected void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+  void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
     visitor.grow(count);
     // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
     assert scratchPackedValue.length == packedBytesLength;
@@ -79,6 +171,175 @@ class SimpleTextBKDReader extends BKDReader {
     }
   }
 
+  private void visitCompressedDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor, int compressedDim) throws IOException {
+    // the byte at `compressedByteOffset` is compressed using run-length compression,
+    // other suffix bytes are stored verbatim
+    final int compressedByteOffset = compressedDim * bytesPerDim + commonPrefixLengths[compressedDim];
+    commonPrefixLengths[compressedDim]++;
+    int i;
+    for (i = 0; i < count; ) {
+      scratchPackedValue[compressedByteOffset] = in.readByte();
+      final int runLen = Byte.toUnsignedInt(in.readByte());
+      for (int j = 0; j < runLen; ++j) {
+        for(int dim=0;dim<numDims;dim++) {
+          int prefix = commonPrefixLengths[dim];
+          in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
+        }
+        visitor.visit(docIDs[i+j], scratchPackedValue);
+      }
+      i += runLen;
+    }
+    if (i != count) {
+      throw new CorruptIndexException("Sub blocks do not add up to the expected count: " + count + " != " + i, in);
+    }
+  }
+
+  private int readCompressedDim(IndexInput in) throws IOException {
+    int compressedDim = in.readByte();
+    if (compressedDim < -1 || compressedDim >= numDims) {
+      throw new CorruptIndexException("Got compressedDim="+compressedDim, in);
+    }
+    return compressedDim;
+  }
+
+  private void readCommonPrefixes(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
+    for(int dim=0;dim<numDims;dim++) {
+      int prefix = in.readVInt();
+      commonPrefixLengths[dim] = prefix;
+      if (prefix > 0) {
+        in.readBytes(scratchPackedValue, dim*bytesPerDim, prefix);
+      }
+      //System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
+    }
+  }
+
+  private void intersect(IntersectState state,
+                         int nodeID,
+                         byte[] cellMinPacked, byte[] cellMaxPacked)
+    throws IOException {
+
+    /*
+    System.out.println("\nR: intersect nodeID=" + nodeID);
+    for(int dim=0;dim<numDims;dim++) {
+      System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
+    }
+    */
+
+    Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
+
+    if (r == Relation.CELL_OUTSIDE_QUERY) {
+      // This cell is fully outside of the query shape: stop recursing
+      return;
+    } else if (r == Relation.CELL_INSIDE_QUERY) {
+      // This cell is fully inside of the query shape: recursively add all points in this cell without filtering
+      addAll(state, nodeID);
+      return;
+    } else {
+      // The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering
+    }
+
+    if (nodeID >= leafNodeOffset) {
+      // TODO: we can assert that the first value here in fact matches what the index claimed?
+
+      int leafID = nodeID - leafNodeOffset;
+      
+      // In the unbalanced case it's possible the left most node only has one child:
+      if (leafID < leafBlockFPs.length) {
+        // Leaf node; scan and filter all points in this block:
+        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+
+        // Again, this time reading values and checking with the visitor
+        visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
+      }
+
+    } else {
+      
+      // Non-leaf node: recurse on the split left and right nodes
+
+      int address = nodeID * bytesPerIndexEntry;
+      int splitDim;
+      if (numDims == 1) {
+        splitDim = 0;
+      } else {
+        splitDim = splitPackedValues[address++] & 0xff;
+      }
+      
+      assert splitDim < numDims;
+
+      // TODO: can we alloc & reuse this up front?
+
+      byte[] splitPackedValue = new byte[packedBytesLength];
+
+      // Recurse on left sub-tree:
+      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
+      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      intersect(state,
+                2*nodeID,
+                cellMinPacked, splitPackedValue);
+
+      // Recurse on right sub-tree:
+      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
+      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      intersect(state,
+                2*nodeID+1,
+                splitPackedValue, cellMaxPacked);
+    }
+  }
+
+  /** Copies the split value for this node into the provided byte array */
+  public void copySplitValue(int nodeID, byte[] splitPackedValue) {
+    int address = nodeID * bytesPerIndexEntry;
+    int splitDim;
+    if (numDims == 1) {
+      splitDim = 0;
+    } else {
+      splitDim = splitPackedValues[address++] & 0xff;
+    }
+    
+    assert splitDim < numDims;
+    System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return RamUsageEstimator.sizeOf(splitPackedValues) +
+        RamUsageEstimator.sizeOf(leafBlockFPs);
+  }
+
+  @Override
+  public byte[] getMinPackedValue() {
+    return minPackedValue.clone();
+  }
+
+  @Override
+  public byte[] getMaxPackedValue() {
+    return maxPackedValue.clone();
+  }
+
+  @Override
+  public int getNumDimensions() {
+    return numDims;
+  }
+
+  @Override
+  public int getBytesPerDimension() {
+    return bytesPerDim;
+  }
+
+  @Override
+  public long size() {
+    return pointCount;
+  }
+
+  @Override
+  public int getDocCount() {
+    return docCount;
+  }
+
+  public boolean isLeafNode(int nodeID) {
+    return nodeID >= leafNodeOffset;
+  }
+
   private int parseInt(BytesRefBuilder scratch, BytesRef prefix) {
     assert startsWith(scratch, prefix);
     return Integer.parseInt(stripPrefix(scratch, prefix));
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
new file mode 100644
index 00000000000..d7674edf369
--- /dev/null
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
@@ -0,0 +1,1661 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.simpletext;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.function.IntFunction;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.MutablePointValues;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.TrackingDirectoryWrapper;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.BytesRefComparator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.MSBRadixSorter;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.bkd.BKDWriter;
+import org.apache.lucene.util.bkd.HeapPointWriter;
+import org.apache.lucene.util.bkd.MutablePointsReaderUtils;
+import org.apache.lucene.util.bkd.OfflinePointReader;
+import org.apache.lucene.util.bkd.OfflinePointWriter;
+import org.apache.lucene.util.bkd.PointReader;
+import org.apache.lucene.util.bkd.PointWriter;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_DOC_ID;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_FP;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BYTES_PER_DIM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.DOC_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.INDEX_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MAX_LEAF_POINTS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MAX_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MIN_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.NUM_DIMS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.POINT_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_DIM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_VALUE;
+
+
+// TODO
+//   - allow variable length byte[] (across docs and dims), but this is quite a bit more hairy
+//   - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
+//     only index docIDs
+//   - the index could be efficiently encoded as an FST, so we don't have wasteful
+//     (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
+//     the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
+//     per leaf, and you can reduce that by putting more points per leaf
+//   - we could use threads while building; the higher nodes are very parallelizable
+
+/** Forked from {@link BKDWriter} and simplified/specialized for SimpleText's usage */
+
+final class SimpleTextBKDWriter implements Closeable {
+
+  public static final String CODEC_NAME = "BKD";
+  public static final int VERSION_START = 0;
+  public static final int VERSION_COMPRESSED_DOC_IDS = 1;
+  public static final int VERSION_COMPRESSED_VALUES = 2;
+  public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
+  public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
+
+  /** How many bytes each docs takes in the fixed-width offline format */
+  private final int bytesPerDoc;
+
+  /** Default maximum number of point in each leaf block */
+  public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
+
+  /** Default maximum heap to use, before spilling to (slower) disk */
+  public static final float DEFAULT_MAX_MB_SORT_IN_HEAP = 16.0f;
+
+  /** Maximum number of dimensions */
+  public static final int MAX_DIMS = 8;
+
+  /** How many dimensions we are indexing */
+  protected final int numDims;
+
+  /** How many bytes each value in each dimension takes. */
+  protected final int bytesPerDim;
+
+  /** numDims * bytesPerDim */
+  protected final int packedBytesLength;
+
+  final BytesRefBuilder scratch = new BytesRefBuilder();
+
+  final TrackingDirectoryWrapper tempDir;
+  final String tempFileNamePrefix;
+  final double maxMBSortInHeap;
+
+  final byte[] scratchDiff;
+  final byte[] scratch1;
+  final byte[] scratch2;
+  final BytesRef scratchBytesRef1 = new BytesRef();
+  final BytesRef scratchBytesRef2 = new BytesRef();
+  final int[] commonPrefixLengths;
+
+  protected final FixedBitSet docsSeen;
+
+  private OfflinePointWriter offlinePointWriter;
+  private HeapPointWriter heapPointWriter;
+
+  private IndexOutput tempInput;
+  protected final int maxPointsInLeafNode;
+  private final int maxPointsSortInHeap;
+
+  /** Minimum per-dim values, packed */
+  protected final byte[] minPackedValue;
+
+  /** Maximum per-dim values, packed */
+  protected final byte[] maxPackedValue;
+
+  protected long pointCount;
+
+  /** true if we have so many values that we must write ords using long (8 bytes) instead of int (4 bytes) */
+  protected final boolean longOrds;
+
+  /** An upper bound on how many points the caller will add (includes deletions) */
+  private final long totalPointCount;
+
+  /** True if every document has at most one value.  We specialize this case by not bothering to store the ord since it's redundant with docID.  */
+  protected final boolean singleValuePerDoc;
+
+  /** How much heap OfflineSorter is allowed to use */
+  protected final OfflineSorter.BufferSize offlineSorterBufferMB;
+
+  /** How much heap OfflineSorter is allowed to use */
+  protected final int offlineSorterMaxTempFiles;
+
+  private final int maxDoc;
+
+  public SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim,
+                             int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount, boolean singleValuePerDoc) throws IOException {
+    this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc,
+         totalPointCount > Integer.MAX_VALUE, Math.max(1, (long) maxMBSortInHeap), OfflineSorter.MAX_TEMPFILES);
+  }
+
+  private SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim,
+                              int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount,
+                              boolean singleValuePerDoc, boolean longOrds, long offlineSorterBufferMB, int offlineSorterMaxTempFiles) throws IOException {
+    verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
+    // We use tracking dir to deal with removing files on exception, so each place that
+    // creates temp files doesn't need crazy try/finally/sucess logic:
+    this.tempDir = new TrackingDirectoryWrapper(tempDir);
+    this.tempFileNamePrefix = tempFileNamePrefix;
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.numDims = numDims;
+    this.bytesPerDim = bytesPerDim;
+    this.totalPointCount = totalPointCount;
+    this.maxDoc = maxDoc;
+    this.offlineSorterBufferMB = OfflineSorter.BufferSize.megabytes(offlineSorterBufferMB);
+    this.offlineSorterMaxTempFiles = offlineSorterMaxTempFiles;
+    docsSeen = new FixedBitSet(maxDoc);
+    packedBytesLength = numDims * bytesPerDim;
+
+    scratchDiff = new byte[bytesPerDim];
+    scratch1 = new byte[packedBytesLength];
+    scratch2 = new byte[packedBytesLength];
+    commonPrefixLengths = new int[numDims];
+
+    minPackedValue = new byte[packedBytesLength];
+    maxPackedValue = new byte[packedBytesLength];
+
+    // If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with long (8 bytes), else we can use int (4 bytes).
+    this.longOrds = longOrds;
+
+    this.singleValuePerDoc = singleValuePerDoc;
+
+    // dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int)
+    if (singleValuePerDoc) {
+      // Lucene only supports up to 2.1 docs, so we better not need longOrds in this case:
+      assert longOrds == false;
+      bytesPerDoc = packedBytesLength + Integer.BYTES;
+    } else if (longOrds) {
+      bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
+    } else {
+      bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES;
+    }
+
+    // As we recurse, we compute temporary partitions of the data, halving the
+    // number of points at each recursion.  Once there are few enough points,
+    // we can switch to sorting in heap instead of offline (on disk).  At any
+    // time in the recursion, we hold the number of points at that level, plus
+    // all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
+    // what that level would consume, so we multiply by 0.5 to convert from
+    // bytes to points here.  Each dimension has its own sorted partition, so
+    // we must divide by numDims as wel.
+
+    maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
+
+    // Finally, we must be able to hold at least the leaf node in heap during build:
+    if (maxPointsSortInHeap < maxPointsInLeafNode) {
+      throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
+    }
+
+    // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
+    heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds, singleValuePerDoc);
+
+    this.maxMBSortInHeap = maxMBSortInHeap;
+  }
+
+  public static void verifyParams(int numDims, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) {
+    // We encode dim in a single byte in the splitPackedValues, but we only expose 4 bits for it now, in case we want to use
+    // remaining 4 bits for another purpose later
+    if (numDims < 1 || numDims > MAX_DIMS) {
+      throw new IllegalArgumentException("numDims must be 1 .. " + MAX_DIMS + " (got: " + numDims + ")");
+    }
+    if (maxPointsInLeafNode <= 0) {
+      throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
+    }
+    if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
+    }
+    if (maxMBSortInHeap < 0.0) {
+      throw new IllegalArgumentException("maxMBSortInHeap must be >= 0.0 (got: " + maxMBSortInHeap + ")");
+    }
+    if (totalPointCount < 0) {
+      throw new IllegalArgumentException("totalPointCount must be >=0 (got: " + totalPointCount + ")");
+    }
+  }
+
+  /** If the current segment has too many points then we spill over to temp files / offline sort. */
+  private void spillToOffline() throws IOException {
+
+    // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
+    offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", 0, singleValuePerDoc);
+    tempInput = offlinePointWriter.out;
+    PointReader reader = heapPointWriter.getReader(0, pointCount);
+    for(int i=0;i<pointCount;i++) {
+      boolean hasNext = reader.next();
+      assert hasNext;
+      offlinePointWriter.append(reader.packedValue(), i, heapPointWriter.docIDs[i]);
+    }
+
+    heapPointWriter = null;
+  }
+
+  public void add(byte[] packedValue, int docID) throws IOException {
+    if (packedValue.length != packedBytesLength) {
+      throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
+    }
+
+    if (pointCount >= maxPointsSortInHeap) {
+      if (offlinePointWriter == null) {
+        spillToOffline();
+      }
+      offlinePointWriter.append(packedValue, pointCount, docID);
+    } else {
+      // Not too many points added yet, continue using heap:
+      heapPointWriter.append(packedValue, pointCount, docID);
+    }
+
+    // TODO: we could specialize for the 1D case:
+    if (pointCount == 0) {
+      System.arraycopy(packedValue, 0, minPackedValue, 0, packedBytesLength);
+      System.arraycopy(packedValue, 0, maxPackedValue, 0, packedBytesLength);
+    } else {
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = dim*bytesPerDim;
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, minPackedValue, offset) < 0) {
+          System.arraycopy(packedValue, offset, minPackedValue, offset, bytesPerDim);
+        }
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, maxPackedValue, offset) > 0) {
+          System.arraycopy(packedValue, offset, maxPackedValue, offset, bytesPerDim);
+        }
+      }
+    }
+
+    pointCount++;
+    if (pointCount > totalPointCount) {
+      throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
+    }
+    docsSeen.set(docID);
+  }
+
+  /** How many points have been added so far */
+  public long getPointCount() {
+    return pointCount;
+  }
+
+  private static class MergeReader {
+    final SimpleTextBKDReader bkd;
+    final SimpleTextBKDReader.IntersectState state;
+    final MergeState.DocMap docMap;
+
+    /** Current doc ID */
+    public int docID;
+
+    /** Which doc in this block we are up to */
+    private int docBlockUpto;
+
+    /** How many docs in the current block */
+    private int docsInBlock;
+
+    /** Which leaf block we are up to */
+    private int blockID;
+
+    private final byte[] packedValues;
+
+    public MergeReader(SimpleTextBKDReader bkd, MergeState.DocMap docMap) throws IOException {
+      this.bkd = bkd;
+      state = new SimpleTextBKDReader.IntersectState(bkd.in.clone(),
+                                                     bkd.numDims,
+                                                     bkd.packedBytesLength,
+                                                     bkd.maxPointsInLeafNode,
+                                                     null);
+      this.docMap = docMap;
+      long minFP = Long.MAX_VALUE;
+      //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
+      for(long fp : bkd.leafBlockFPs) {
+        minFP = Math.min(minFP, fp);
+        //System.out.println("  leaf fp=" + fp);
+      }
+      state.in.seek(minFP);
+      this.packedValues = new byte[bkd.maxPointsInLeafNode * bkd.packedBytesLength];
+    }
+
+    public boolean next() throws IOException {
+      //System.out.println("MR.next this=" + this);
+      while (true) {
+        if (docBlockUpto == docsInBlock) {
+          if (blockID == bkd.leafBlockFPs.length) {
+            //System.out.println("  done!");
+            return false;
+          }
+          //System.out.println("  new block @ fp=" + state.in.getFilePointer());
+          docsInBlock = bkd.readDocIDs(state.in, state.in.getFilePointer(), state.scratchDocIDs);
+          assert docsInBlock > 0;
+          docBlockUpto = 0;
+          bkd.visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, docsInBlock, new IntersectVisitor() {
+            int i = 0;
+
+            @Override
+            public void visit(int docID) throws IOException {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public void visit(int docID, byte[] packedValue) throws IOException {
+              assert docID == state.scratchDocIDs[i];
+              System.arraycopy(packedValue, 0, packedValues, i * bkd.packedBytesLength, bkd.packedBytesLength);
+              i++;
+            }
+
+            @Override
+            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+              throw new UnsupportedOperationException();
+            }
+
+          });
+
+          blockID++;
+        }
+
+        final int index = docBlockUpto++;
+        int oldDocID = state.scratchDocIDs[index];
+
+        int mappedDocID;
+        if (docMap == null) {
+          mappedDocID = oldDocID;
+        } else {
+          mappedDocID = docMap.get(oldDocID);
+        }
+
+        if (mappedDocID != -1) {
+          // Not deleted!
+          docID = mappedDocID;
+          System.arraycopy(packedValues, index * bkd.packedBytesLength, state.scratchPackedValue, 0, bkd.packedBytesLength);
+          return true;
+        }
+      }
+    }
+  }
+
+  private static class BKDMergeQueue extends PriorityQueue<MergeReader> {
+    private final int bytesPerDim;
+
+    public BKDMergeQueue(int bytesPerDim, int maxSize) {
+      super(maxSize);
+      this.bytesPerDim = bytesPerDim;
+    }
+
+    @Override
+    public boolean lessThan(MergeReader a, MergeReader b) {
+      assert a != b;
+
+      int cmp = StringHelper.compare(bytesPerDim, a.state.scratchPackedValue, 0, b.state.scratchPackedValue, 0);
+      if (cmp < 0) {
+        return true;
+      } else if (cmp > 0) {
+        return false;
+      }
+
+      // Tie break by sorting smaller docIDs earlier:
+      return a.docID < b.docID;
+    }
+  }
+
+  /** Write a field from a {@link MutablePointValues}. This way of writing
+   *  points is faster than regular writes with {@link BKDWriter#add} since
+   *  there is opportunity for reordering points before writing them to
+   *  disk. This method does not use transient disk in order to reorder points.
+   */
+  public long writeField(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
+    if (numDims == 1) {
+      return writeField1Dim(out, fieldName, reader);
+    } else {
+      return writeFieldNDims(out, fieldName, reader);
+    }
+  }
+
+
+  /* In the 2+D case, we recursively pick the split dimension, compute the
+   * median value and partition other values around it. */
+  private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointValues values) throws IOException {
+    if (pointCount != 0) {
+      throw new IllegalStateException("cannot mix add and writeField");
+    }
+
+    // Catch user silliness:
+    if (heapPointWriter == null && tempInput == null) {
+      throw new IllegalStateException("already finished");
+    }
+
+    // Mark that we already finished:
+    heapPointWriter = null;
+
+    long countPerLeaf = pointCount = values.size();
+    long innerNodeCount = 1;
+
+    while (countPerLeaf > maxPointsInLeafNode) {
+      countPerLeaf = (countPerLeaf+1)/2;
+      innerNodeCount *= 2;
+    }
+
+    int numLeaves = Math.toIntExact(innerNodeCount);
+
+    checkMaxLeafNodeCount(numLeaves);
+
+    final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
+    final long[] leafBlockFPs = new long[numLeaves];
+
+    // compute the min/max for this slice
+    Arrays.fill(minPackedValue, (byte) 0xff);
+    Arrays.fill(maxPackedValue, (byte) 0);
+    for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
+      values.getValue(i, scratchBytesRef1);
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = dim*bytesPerDim;
+        if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
+          System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
+        }
+        if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
+          System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
+        }
+      }
+
+      docsSeen.set(values.getDocID(i));
+    }
+
+    build(1, numLeaves, values, 0, Math.toIntExact(pointCount), out,
+        minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
+        new int[maxPointsInLeafNode]);
+
+    long indexFP = out.getFilePointer();
+    writeIndex(out, leafBlockFPs, splitPackedValues);
+    return indexFP;
+  }
+
+
+  /* In the 1D case, we can simply sort points in ascending order and use the
+   * same writing logic as we use at merge time. */
+  private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
+    MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
+
+    final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
+
+    reader.intersect(new IntersectVisitor() {
+
+      @Override
+      public void visit(int docID, byte[] packedValue) throws IOException {
+        oneDimWriter.add(packedValue, docID);
+      }
+
+      @Override
+      public void visit(int docID) throws IOException {
+        throw new IllegalStateException();
+      }
+
+      @Override
+      public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+        return Relation.CELL_CROSSES_QUERY;
+      }
+    });
+
+    return oneDimWriter.finish();
+  }
+
+  // TODO: remove this opto: SimpleText is supposed to be simple!
+  
+  /** More efficient bulk-add for incoming {@link SimpleTextBKDReader}s.  This does a merge sort of the already
+   *  sorted values and currently only works when numDims==1.  This returns -1 if all documents containing
+   *  dimensional values were deleted. */
+  public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<SimpleTextBKDReader> readers) throws IOException {
+    assert docMaps == null || readers.size() == docMaps.size();
+
+    BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
+
+    for(int i=0;i<readers.size();i++) {
+      SimpleTextBKDReader bkd = readers.get(i);
+      MergeState.DocMap docMap;
+      if (docMaps == null) {
+        docMap = null;
+      } else {
+        docMap = docMaps.get(i);
+      }
+      MergeReader reader = new MergeReader(bkd, docMap);
+      if (reader.next()) {
+        queue.add(reader);
+      }
+    }
+
+    OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
+
+    while (queue.size() != 0) {
+      MergeReader reader = queue.top();
+      // System.out.println("iter reader=" + reader);
+
+      // NOTE: doesn't work with subclasses (e.g. SimpleText!)
+      oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
+
+      if (reader.next()) {
+        queue.updateTop();
+      } else {
+        // This segment was exhausted
+        queue.pop();
+      }
+    }
+
+    return oneDimWriter.finish();
+  }
+
+  private class OneDimensionBKDWriter {
+
+    final IndexOutput out;
+    final List<Long> leafBlockFPs = new ArrayList<>();
+    final List<byte[]> leafBlockStartValues = new ArrayList<>();
+    final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
+    final int[] leafDocs = new int[maxPointsInLeafNode];
+    long valueCount;
+    int leafCount;
+
+    OneDimensionBKDWriter(IndexOutput out) {
+      if (numDims != 1) {
+        throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
+      }
+      if (pointCount != 0) {
+        throw new IllegalStateException("cannot mix add and merge");
+      }
+
+      // Catch user silliness:
+      if (heapPointWriter == null && tempInput == null) {
+        throw new IllegalStateException("already finished");
+      }
+
+      // Mark that we already finished:
+      heapPointWriter = null;
+
+      this.out = out;
+
+      lastPackedValue = new byte[packedBytesLength];
+    }
+
+    // for asserts
+    final byte[] lastPackedValue;
+    int lastDocID;
+
+    void add(byte[] packedValue, int docID) throws IOException {
+      assert valueInOrder(valueCount + leafCount,
+          0, lastPackedValue, packedValue, 0, docID, lastDocID);
+
+      System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
+      leafDocs[leafCount] = docID;
+      docsSeen.set(docID);
+      leafCount++;
+
+      if (valueCount > totalPointCount) {
+        throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
+      }
+
+      if (leafCount == maxPointsInLeafNode) {
+        // We write a block once we hit exactly the max count ... this is different from
+        // when we flush a new segment, where we write between max/2 and max per leaf block,
+        // so merged segments will behave differently from newly flushed segments:
+        writeLeafBlock();
+        leafCount = 0;
+      }
+
+      assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
+    }
+
+    public long finish() throws IOException {
+      if (leafCount > 0) {
+        writeLeafBlock();
+        leafCount = 0;
+      }
+
+      if (valueCount == 0) {
+        return -1;
+      }
+
+      pointCount = valueCount;
+
+      long indexFP = out.getFilePointer();
+
+      int numInnerNodes = leafBlockStartValues.size();
+
+      //System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size());
+
+      byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)];
+      rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
+      long[] arr = new long[leafBlockFPs.size()];
+      for(int i=0;i<leafBlockFPs.size();i++) {
+        arr[i] = leafBlockFPs.get(i);
+      }
+      writeIndex(out, arr, index);
+      return indexFP;
+    }
+
+    private void writeLeafBlock() throws IOException {
+      assert leafCount != 0;
+      if (valueCount == 0) {
+        System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
+      }
+      System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
+
+      valueCount += leafCount;
+
+      if (leafBlockFPs.size() > 0) {
+        // Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
+        leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
+      }
+      leafBlockFPs.add(out.getFilePointer());
+      checkMaxLeafNodeCount(leafBlockFPs.size());
+
+      Arrays.fill(commonPrefixLengths, bytesPerDim);
+      // Find per-dim common prefix:
+      for(int dim=0;dim<numDims;dim++) {
+        int offset1 = dim * bytesPerDim;
+        int offset2 = (leafCount - 1) * packedBytesLength + offset1;
+        for(int j=0;j<commonPrefixLengths[dim];j++) {
+          if (leafValues[offset1+j] != leafValues[offset2+j]) {
+            commonPrefixLengths[dim] = j;
+            break;
+          }
+        }
+      }
+
+      writeLeafBlockDocs(out, leafDocs, 0, leafCount);
+
+      final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
+        final BytesRef scratch = new BytesRef();
+
+        {
+          scratch.length = packedBytesLength;
+          scratch.bytes = leafValues;
+        }
+
+        @Override
+        public BytesRef apply(int i) {
+          scratch.offset = packedBytesLength * i;
+          return scratch;
+        }
+      };
+      assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
+          Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
+          packedValues, leafDocs, 0);
+      writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
+    }
+
+  }
+
+  // TODO: there must be a simpler way?
+  private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
+    //System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
+    if (count == 1) {
+      // Leaf index node
+      //System.out.println("  leaf index node");
+      //System.out.println("  index[" + nodeID + "] = blockStartValues[" + offset + "]");
+      System.arraycopy(leafBlockStartValues.get(offset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
+    } else if (count > 1) {
+      // Internal index node: binary partition of count
+      int countAtLevel = 1;
+      int totalCount = 0;
+      while (true) {
+        int countLeft = count - totalCount;
+        //System.out.println("    cycle countLeft=" + countLeft + " coutAtLevel=" + countAtLevel);
+        if (countLeft <= countAtLevel) {
+          // This is the last level, possibly partially filled:
+          int lastLeftCount = Math.min(countAtLevel/2, countLeft);
+          assert lastLeftCount >= 0;
+          int leftHalf = (totalCount-1)/2 + lastLeftCount;
+
+          int rootOffset = offset + leftHalf;
+          /*
+          System.out.println("  last left count " + lastLeftCount);
+          System.out.println("  leftHalf " + leftHalf + " rightHalf=" + (count-leftHalf-1));
+          System.out.println("  rootOffset=" + rootOffset);
+          */
+
+          System.arraycopy(leafBlockStartValues.get(rootOffset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
+          //System.out.println("  index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
+
+          // TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
+          // under here, to save this while loop on each recursion
+
+          // Recurse left
+          rotateToTree(2*nodeID, offset, leftHalf, index, leafBlockStartValues);
+
+          // Recurse right
+          rotateToTree(2*nodeID+1, rootOffset+1, count-leftHalf-1, index, leafBlockStartValues);
+          return;
+        }
+        totalCount += countAtLevel;
+        countAtLevel *= 2;
+      }
+    } else {
+      assert count == 0;
+    }
+  }
+
+  // TODO: if we fixed each partition step to just record the file offset at the "split point", we could probably handle variable length
+  // encoding and not have our own ByteSequencesReader/Writer
+
+  /** Sort the heap writer by the specified dim */
+  private void sortHeapPointWriter(final HeapPointWriter writer, int dim) {
+    final int pointCount = Math.toIntExact(this.pointCount);
+    // Tie-break by docID:
+
+    // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
+    // can't matter at search time since we don't write ords into the index:
+    new MSBRadixSorter(bytesPerDim + Integer.BYTES) {
+
+      @Override
+      protected int byteAt(int i, int k) {
+        assert k >= 0;
+        if (k < bytesPerDim) {
+          // dim bytes
+          int block = i / writer.valuesPerBlock;
+          int index = i % writer.valuesPerBlock;
+          return writer.blocks.get(block)[index * packedBytesLength + dim * bytesPerDim + k] & 0xff;
+        } else {
+          // doc id
+          int s = 3 - (k - bytesPerDim);
+          return (writer.docIDs[i] >>> (s * 8)) & 0xff;
+        }
+      }
+
+      @Override
+      protected void swap(int i, int j) {
+        int docID = writer.docIDs[i];
+        writer.docIDs[i] = writer.docIDs[j];
+        writer.docIDs[j] = docID;
+
+        if (singleValuePerDoc == false) {
+          if (longOrds) {
+            long ord = writer.ordsLong[i];
+            writer.ordsLong[i] = writer.ordsLong[j];
+            writer.ordsLong[j] = ord;
+          } else {
+            int ord = writer.ords[i];
+            writer.ords[i] = writer.ords[j];
+            writer.ords[j] = ord;
+          }
+        }
+
+        byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
+        int indexI = (i % writer.valuesPerBlock) * packedBytesLength;
+        byte[] blockJ = writer.blocks.get(j / writer.valuesPerBlock);
+        int indexJ = (j % writer.valuesPerBlock) * packedBytesLength;
+
+        // scratch1 = values[i]
+        System.arraycopy(blockI, indexI, scratch1, 0, packedBytesLength);
+        // values[i] = values[j]
+        System.arraycopy(blockJ, indexJ, blockI, indexI, packedBytesLength);
+        // values[j] = scratch1
+        System.arraycopy(scratch1, 0, blockJ, indexJ, packedBytesLength);
+      }
+
+    }.sort(0, pointCount);
+  }
+
+  private PointWriter sort(int dim) throws IOException {
+    assert dim >= 0 && dim < numDims;
+
+    if (heapPointWriter != null) {
+
+      assert tempInput == null;
+
+      // We never spilled the incoming points to disk, so now we sort in heap:
+      HeapPointWriter sorted;
+
+      if (dim == 0) {
+        // First dim can re-use the current heap writer
+        sorted = heapPointWriter;
+      } else {
+        // Subsequent dims need a private copy
+        sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
+        sorted.copyFrom(heapPointWriter);
+      }
+
+      //long t0 = System.nanoTime();
+      sortHeapPointWriter(sorted, dim);
+      //long t1 = System.nanoTime();
+      //System.out.println("BKD: sort took " + ((t1-t0)/1000000.0) + " msec");
+
+      sorted.close();
+      return sorted;
+    } else {
+
+      // Offline sort:
+      assert tempInput != null;
+
+      final int offset = bytesPerDim * dim;
+
+      Comparator<BytesRef> cmp;
+      if (dim == numDims - 1) {
+        // in that case the bytes for the dimension and for the doc id are contiguous,
+        // so we don't need a branch
+        cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
+          @Override
+          protected int byteAt(BytesRef ref, int i) {
+            return ref.bytes[ref.offset + offset + i] & 0xff;
+          }
+        };
+      } else {
+        cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
+          @Override
+          protected int byteAt(BytesRef ref, int i) {
+            if (i < bytesPerDim) {
+              return ref.bytes[ref.offset + offset + i] & 0xff;
+            } else {
+              return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
+            }
+          }
+        };
+      }
+
+      OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {
+
+          /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
+          @Override
+          protected ByteSequencesWriter getWriter(IndexOutput out) {
+            return new ByteSequencesWriter(out) {
+              @Override
+              public void write(byte[] bytes, int off, int len) throws IOException {
+                assert len == bytesPerDoc: "len=" + len + " bytesPerDoc=" + bytesPerDoc;
+                out.writeBytes(bytes, off, len);
+              }
+            };
+          }
+
+          /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
+          @Override
+          protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
+            return new ByteSequencesReader(in, name) {
+              final BytesRef scratch = new BytesRef(new byte[bytesPerDoc]);
+              @Override
+              public BytesRef next() throws IOException {
+                if (in.getFilePointer() >= end) {
+                  return null;
+                }
+                in.readBytes(scratch.bytes, 0, bytesPerDoc);
+                return scratch;
+              }
+            };
+          }
+        };
+
+      String name = sorter.sort(tempInput.getName());
+
+      return new OfflinePointWriter(tempDir, name, packedBytesLength, pointCount, longOrds, singleValuePerDoc);
+    }
+  }
+
+  private void checkMaxLeafNodeCount(int numLeaves) {
+    if ((1+bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
+    }
+  }
+
+  /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
+  public long finish(IndexOutput out) throws IOException {
+    // System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapPointWriter);
+
+    // TODO: specialize the 1D case?  it's much faster at indexing time (no partitioning on recurse...)
+
+    // Catch user silliness:
+    if (heapPointWriter == null && tempInput == null) {
+      throw new IllegalStateException("already finished");
+    }
+
+    if (offlinePointWriter != null) {
+      offlinePointWriter.close();
+    }
+
+    if (pointCount == 0) {
+      throw new IllegalStateException("must index at least one point");
+    }
+
+    LongBitSet ordBitSet;
+    if (numDims > 1) {
+      if (singleValuePerDoc) {
+        ordBitSet = new LongBitSet(maxDoc);
+      } else {
+        ordBitSet = new LongBitSet(pointCount);
+      }
+    } else {
+      ordBitSet = null;
+    }
+
+    long countPerLeaf = pointCount;
+    long innerNodeCount = 1;
+
+    while (countPerLeaf > maxPointsInLeafNode) {
+      countPerLeaf = (countPerLeaf+1)/2;
+      innerNodeCount *= 2;
+    }
+
+    int numLeaves = (int) innerNodeCount;
+
+    checkMaxLeafNodeCount(numLeaves);
+
+    // NOTE: we could save the 1+ here, to use a bit less heap at search time, but then we'd need a somewhat costly check at each
+    // step of the recursion to recompute the split dim:
+
+    // Indexed by nodeID, but first (root) nodeID is 1.  We do 1+ because the lead byte at each recursion says which dim we split on.
+    byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves*(1+bytesPerDim))];
+
+    // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
+    long[] leafBlockFPs = new long[numLeaves];
+
+    // Make sure the math above "worked":
+    assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
+
+    // Sort all docs once by each dimension:
+    PathSlice[] sortedPointWriters = new PathSlice[numDims];
+
+    // This is only used on exception; on normal code paths we close all files we opened:
+    List<Closeable> toCloseHeroically = new ArrayList<>();
+
+    boolean success = false;
+    try {
+      //long t0 = System.nanoTime();
+      for(int dim=0;dim<numDims;dim++) {
+        sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
+      }
+      //long t1 = System.nanoTime();
+      //System.out.println("sort time: " + ((t1-t0)/1000000.0) + " msec");
+
+      if (tempInput != null) {
+        tempDir.deleteFile(tempInput.getName());
+        tempInput = null;
+      } else {
+        assert heapPointWriter != null;
+        heapPointWriter = null;
+      }
+
+      build(1, numLeaves, sortedPointWriters,
+            ordBitSet, out,
+            minPackedValue, maxPackedValue,
+            splitPackedValues,
+            leafBlockFPs,
+            toCloseHeroically);
+
+      for(PathSlice slice : sortedPointWriters) {
+        slice.writer.destroy();
+      }
+
+      // If no exception, we should have cleaned everything up:
+      assert tempDir.getCreatedFiles().isEmpty();
+      //long t2 = System.nanoTime();
+      //System.out.println("write time: " + ((t2-t1)/1000000.0) + " msec");
+
+      success = true;
+    } finally {
+      if (success == false) {
+        IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
+        IOUtils.closeWhileHandlingException(toCloseHeroically);
+      }
+    }
+
+    //System.out.println("Total nodes: " + innerNodeCount);
+
+    // Write index:
+    long indexFP = out.getFilePointer();
+    writeIndex(out, leafBlockFPs, splitPackedValues);
+    return indexFP;
+  }
+
+  /** Subclass can change how it writes the index. */
+  private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+    write(out, NUM_DIMS);
+    writeInt(out, numDims);
+    newline(out);
+
+    write(out, BYTES_PER_DIM);
+    writeInt(out, bytesPerDim);
+    newline(out);
+
+    write(out, MAX_LEAF_POINTS);
+    writeInt(out, maxPointsInLeafNode);
+    newline(out);
+
+    write(out, INDEX_COUNT);
+    writeInt(out, leafBlockFPs.length);
+    newline(out);
+
+    write(out, MIN_VALUE);
+    BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length);
+    write(out, br.toString());
+    newline(out);
+
+    write(out, MAX_VALUE);
+    br = new BytesRef(maxPackedValue, 0, maxPackedValue.length);
+    write(out, br.toString());
+    newline(out);
+
+    write(out, POINT_COUNT);
+    writeLong(out, pointCount);
+    newline(out);
+
+    write(out, DOC_COUNT);
+    writeInt(out, docsSeen.cardinality());
+    newline(out);
+
+    for(int i=0;i<leafBlockFPs.length;i++) {
+      write(out, BLOCK_FP);
+      writeLong(out, leafBlockFPs[i]);
+      newline(out);
+    }
+
+    assert (splitPackedValues.length % (1 + bytesPerDim)) == 0;
+    int count = splitPackedValues.length / (1 + bytesPerDim);
+    assert count == leafBlockFPs.length;
+
+    write(out, SPLIT_COUNT);
+    writeInt(out, count);
+    newline(out);
+
+    for(int i=0;i<count;i++) {
+      write(out, SPLIT_DIM);
+      writeInt(out, splitPackedValues[i * (1 + bytesPerDim)] & 0xff);
+      newline(out);
+      write(out, SPLIT_VALUE);
+      br = new BytesRef(splitPackedValues, 1+(i * (1+bytesPerDim)), bytesPerDim);
+      write(out, br.toString());
+      newline(out);
+    }
+  }
+
+  protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
+    write(out, BLOCK_COUNT);
+    writeInt(out, count);
+    newline(out);
+    for(int i=0;i<count;i++) {
+      write(out, BLOCK_DOC_ID);
+      writeInt(out, docIDs[start+i]);
+      newline(out);
+    }
+  }
+
+  protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+    for (int i = 0; i < count; ++i) {
+      BytesRef packedValue = packedValues.apply(i);
+      // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
+      write(out, BLOCK_VALUE);
+      write(out, packedValue.toString());
+      newline(out);
+    }
+  }
+
+  private void writeLeafBlockPackedValuesRange(IndexOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
+    for (int i = start; i < end; ++i) {
+      BytesRef ref = packedValues.apply(i);
+      assert ref.length == packedBytesLength;
+
+      for(int dim=0;dim<numDims;dim++) {
+        int prefix = commonPrefixLengths[dim];
+        out.writeBytes(ref.bytes, ref.offset + dim*bytesPerDim + prefix, bytesPerDim-prefix);
+      }
+    }
+  }
+
+  private static int runLen(IntFunction<BytesRef> packedValues, int start, int end, int byteOffset) {
+    BytesRef first = packedValues.apply(start);
+    byte b = first.bytes[first.offset + byteOffset];
+    for (int i = start + 1; i < end; ++i) {
+      BytesRef ref = packedValues.apply(i);
+      byte b2 = ref.bytes[ref.offset + byteOffset];
+      assert Byte.toUnsignedInt(b2) >= Byte.toUnsignedInt(b);
+      if (b != b2) {
+        return i - start;
+      }
+    }
+    return end - start;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (tempInput != null) {
+      // NOTE: this should only happen on exception, e.g. caller calls close w/o calling finish:
+      try {
+        tempInput.close();
+      } finally {
+        tempDir.deleteFile(tempInput.getName());
+        tempInput = null;
+      }
+    }
+  }
+
+  /** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
+  private static final class PathSlice {
+    final PointWriter writer;
+    final long start;
+    final long count;
+
+    public PathSlice(PointWriter writer, long start, long count) {
+      this.writer = writer;
+      this.start = start;
+      this.count = count;
+    }
+
+    @Override
+    public String toString() {
+      return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
+    }
+  }
+
+  /** Called on exception, to check whether the checksum is also corrupt in this source, and add that
+   *  information (checksum matched or didn't) as a suppressed exception. */
+  private void verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
+    // TODO: we could improve this, to always validate checksum as we recurse, if we shared left and
+    // right reader after recursing to children, and possibly within recursed children,
+    // since all together they make a single pass through the file.  But this is a sizable re-org,
+    // and would mean leaving readers (IndexInputs) open for longer:
+    if (writer instanceof OfflinePointWriter) {
+      // We are reading from a temp file; go verify the checksum:
+      String tempFileName = ((OfflinePointWriter) writer).name;
+      try (ChecksumIndexInput in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE)) {
+        CodecUtil.checkFooter(in, priorException);
+      }
+    } else {
+      // We are reading from heap; nothing to add:
+      IOUtils.reThrow(priorException);
+    }
+  }
+
+  /** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */
+  private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException {
+
+    // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim:
+
+    // Read the split value, then mark all ords in the right tree (larger than the split value):
+
+    // TODO: find a way to also checksum this reader?  If we changed to markLeftTree, and scanned the final chunk, it could work?
+    try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) {
+      boolean result = reader.next();
+      assert result;
+      System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
+      if (numDims > 1) {
+        assert ordBitSet.get(reader.ord()) == false;
+        ordBitSet.set(reader.ord());
+        // Subtract 1 from rightCount because we already did the first value above (so we could record the split value):
+        reader.markOrds(rightCount-1, ordBitSet);
+      }
+    } catch (Throwable t) {
+      verifyChecksum(t, source.writer);
+    }
+
+    return scratch1;
+  }
+
+  /** Called only in assert */
+  private boolean valueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) {
+    for(int dim=0;dim<numDims;dim++) {
+      int offset = bytesPerDim*dim;
+      if (StringHelper.compare(bytesPerDim, packedValue.bytes, packedValue.offset + offset, minPackedValue, offset) < 0) {
+        return false;
+      }
+      if (StringHelper.compare(bytesPerDim, packedValue.bytes, packedValue.offset + offset, maxPackedValue, offset) > 0) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
+    // Find which dim has the largest span so we can split on it:
+    int splitDim = -1;
+    for(int dim=0;dim<numDims;dim++) {
+      NumericUtils.subtract(bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
+      if (splitDim == -1 || StringHelper.compare(bytesPerDim, scratchDiff, 0, scratch1, 0) > 0) {
+        System.arraycopy(scratchDiff, 0, scratch1, 0, bytesPerDim);
+        splitDim = dim;
+      }
+    }
+
+    //System.out.println("SPLIT: " + splitDim);
+    return splitDim;
+  }
+
+  /** Pull a partition back into heap once the point count is low enough while recursing. */
+  private PathSlice switchToHeap(PathSlice source, List<Closeable> toCloseHeroically) throws IOException {
+    int count = Math.toIntExact(source.count);
+    // Not inside the try because we don't want to close it here:
+    PointReader reader = source.writer.getSharedReader(source.start, source.count, toCloseHeroically);
+    try (PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc)) {
+      for(int i=0;i<count;i++) {
+        boolean hasNext = reader.next();
+        assert hasNext;
+        writer.append(reader.packedValue(), reader.ord(), reader.docID());
+      }
+      return new PathSlice(writer, 0, count);
+    } catch (Throwable t) {
+      verifyChecksum(t, source.writer);
+
+      // Dead code but javac disagrees:
+      return null;
+    }
+  }
+
+  /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
+  private void build(int nodeID, int leafNodeOffset,
+      MutablePointValues reader, int from, int to,
+      IndexOutput out,
+      byte[] minPackedValue, byte[] maxPackedValue,
+      byte[] splitPackedValues,
+      long[] leafBlockFPs,
+      int[] spareDocIds) throws IOException {
+
+    if (nodeID >= leafNodeOffset) {
+      // leaf node
+      final int count = to - from;
+      assert count <= maxPointsInLeafNode;
+
+      // Compute common prefixes
+      Arrays.fill(commonPrefixLengths, bytesPerDim);
+      reader.getValue(from, scratchBytesRef1);
+      for (int i = from + 1; i < to; ++i) {
+        reader.getValue(i, scratchBytesRef2);
+        for (int dim=0;dim<numDims;dim++) {
+          final int offset = dim * bytesPerDim;
+          for(int j=0;j<commonPrefixLengths[dim];j++) {
+            if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
+              commonPrefixLengths[dim] = j;
+              break;
+            }
+          }
+        }
+      }
+
+      // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
+      FixedBitSet[] usedBytes = new FixedBitSet[numDims];
+      for (int dim = 0; dim < numDims; ++dim) {
+        if (commonPrefixLengths[dim] < bytesPerDim) {
+          usedBytes[dim] = new FixedBitSet(256);
+        }
+      }
+      for (int i = from + 1; i < to; ++i) {
+        for (int dim=0;dim<numDims;dim++) {
+          if (usedBytes[dim] != null) {
+            byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
+            usedBytes[dim].set(Byte.toUnsignedInt(b));
+          }
+        }
+      }
+      int sortedDim = 0;
+      int sortedDimCardinality = Integer.MAX_VALUE;
+      for (int dim = 0; dim < numDims; ++dim) {
+        if (usedBytes[dim] != null) {
+          final int cardinality = usedBytes[dim].cardinality();
+          if (cardinality < sortedDimCardinality) {
+            sortedDim = dim;
+            sortedDimCardinality = cardinality;
+          }
+        }
+      }
+
+      // sort by sortedDim
+      MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
+                                         reader, from, to, scratchBytesRef1, scratchBytesRef2);
+
+      // Save the block file pointer:
+      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
+
+      // Write doc IDs
+      int[] docIDs = spareDocIds;
+      for (int i = from; i < to; ++i) {
+        docIDs[i - from] = reader.getDocID(i);
+      }
+      writeLeafBlockDocs(out, docIDs, 0, count);
+
+      // Write the common prefixes:
+      reader.getValue(from, scratchBytesRef1);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
+
+      // Write the full values:
+      IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
+        @Override
+        public BytesRef apply(int i) {
+          reader.getValue(from + i, scratchBytesRef1);
+          return scratchBytesRef1;
+        }
+      };
+      assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
+          docIDs, 0);
+      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+
+    } else {
+      // inner node
+
+      // compute the split dimension and partition around it
+      final int splitDim = split(minPackedValue, maxPackedValue);
+      final int mid = (from + to + 1) >>> 1;
+
+      int commonPrefixLen = bytesPerDim;
+      for (int i = 0; i < bytesPerDim; ++i) {
+        if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
+          commonPrefixLen = i;
+          break;
+        }
+      }
+      MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
+          reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
+
+      // set the split value
+      final int address = nodeID * (1+bytesPerDim);
+      splitPackedValues[address] = (byte) splitDim;
+      reader.getValue(mid, scratchBytesRef1);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
+
+      byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
+      byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
+          minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
+          maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
+
+      // recurse
+      build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
+          minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
+      build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
+          minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
+    }
+  }
+
+  /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
+  private void build(int nodeID, int leafNodeOffset,
+                     PathSlice[] slices,
+                     LongBitSet ordBitSet,
+                     IndexOutput out,
+                     byte[] minPackedValue, byte[] maxPackedValue,
+                     byte[] splitPackedValues,
+                     long[] leafBlockFPs,
+                     List<Closeable> toCloseHeroically) throws IOException {
+
+    for(PathSlice slice : slices) {
+      assert slice.count == slices[0].count;
+    }
+
+    if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
+      // Special case for 1D, to cutover to heap once we recurse deeply enough:
+      slices[0] = switchToHeap(slices[0], toCloseHeroically);
+    }
+
+    if (nodeID >= leafNodeOffset) {
+
+      // Leaf node: write block
+      // We can write the block in any order so by default we write it sorted by the dimension that has the
+      // least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
+      int sortedDim = 0;
+      int sortedDimCardinality = Integer.MAX_VALUE;
+
+      for (int dim=0;dim<numDims;dim++) {
+        if (slices[dim].writer instanceof HeapPointWriter == false) {
+          // Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
+          // offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
+          slices[dim] = switchToHeap(slices[dim], toCloseHeroically);
+        }
+
+        PathSlice source = slices[dim];
+
+        HeapPointWriter heapSource = (HeapPointWriter) source.writer;
+
+        // Find common prefix by comparing first and last values, already sorted in this dimension:
+        heapSource.readPackedValue(Math.toIntExact(source.start), scratch1);
+        heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
+
+        int offset = dim * bytesPerDim;
+        commonPrefixLengths[dim] = bytesPerDim;
+        for(int j=0;j<bytesPerDim;j++) {
+          if (scratch1[offset+j] != scratch2[offset+j]) {
+            commonPrefixLengths[dim] = j;
+            break;
+          }
+        }
+
+        int prefix = commonPrefixLengths[dim];
+        if (prefix < bytesPerDim) {
+          int cardinality = 1;
+          byte previous = scratch1[offset + prefix];
+          for (long i = 1; i < source.count; ++i) {
+            heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch2);
+            byte b = scratch2[offset + prefix];
+            assert Byte.toUnsignedInt(previous) <= Byte.toUnsignedInt(b);
+            if (b != previous) {
+              cardinality++;
+              previous = b;
+            }
+          }
+          assert cardinality <= 256;
+          if (cardinality < sortedDimCardinality) {
+            sortedDim = dim;
+            sortedDimCardinality = cardinality;
+          }
+        }
+      }
+
+      PathSlice source = slices[sortedDim];
+
+      // We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
+      HeapPointWriter heapSource = (HeapPointWriter) source.writer;
+
+      // Save the block file pointer:
+      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
+      //System.out.println("  write leaf block @ fp=" + out.getFilePointer());
+
+      // Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
+      // loading the values:
+      int count = Math.toIntExact(source.count);
+      assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
+      writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
+
+      // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
+      // from the index, much like how terms dict does so from the FST:
+
+      // Write the full values:
+      IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
+        final BytesRef scratch = new BytesRef();
+
+        {
+          scratch.length = packedBytesLength;
+        }
+
+        @Override
+        public BytesRef apply(int i) {
+          heapSource.getPackedValueSlice(Math.toIntExact(source.start + i), scratch);
+          return scratch;
+        }
+      };
+      assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
+          heapSource.docIDs, Math.toIntExact(source.start));
+      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+
+    } else {
+      // Inner node: partition/recurse
+
+      int splitDim;
+      if (numDims > 1) {
+        splitDim = split(minPackedValue, maxPackedValue);
+      } else {
+        splitDim = 0;
+      }
+
+      PathSlice source = slices[splitDim];
+
+      assert nodeID < splitPackedValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
+
+      // How many points will be in the left tree:
+      long rightCount = source.count / 2;
+      long leftCount = source.count - rightCount;
+
+      byte[] splitValue = markRightTree(rightCount, splitDim, source, ordBitSet);
+      int address = nodeID * (1+bytesPerDim);
+      splitPackedValues[address] = (byte) splitDim;
+      System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
+
+      // Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
+
+      PathSlice[] leftSlices = new PathSlice[numDims];
+      PathSlice[] rightSlices = new PathSlice[numDims];
+
+      byte[] minSplitPackedValue = new byte[packedBytesLength];
+      System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedBytesLength);
+
+      byte[] maxSplitPackedValue = new byte[packedBytesLength];
+      System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedBytesLength);
+
+      // When we are on this dim, below, we clear the ordBitSet:
+      int dimToClear;
+      if (numDims - 1 == splitDim) {
+        dimToClear = numDims - 2;
+      } else {
+        dimToClear = numDims - 1;
+      }
+
+      for(int dim=0;dim<numDims;dim++) {
+
+        if (dim == splitDim) {
+          // No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
+          // will re-use its shared reader when visiting it as we recurse:
+          leftSlices[dim] = new PathSlice(source.writer, source.start, leftCount);
+          rightSlices[dim] = new PathSlice(source.writer, source.start + leftCount, rightCount);
+          System.arraycopy(splitValue, 0, minSplitPackedValue, dim*bytesPerDim, bytesPerDim);
+          System.arraycopy(splitValue, 0, maxSplitPackedValue, dim*bytesPerDim, bytesPerDim);
+          continue;
+        }
+
+        // Not inside the try because we don't want to close this one now, so that after recursion is done,
+        // we will have done a singel full sweep of the file:
+        PointReader reader = slices[dim].writer.getSharedReader(slices[dim].start, slices[dim].count, toCloseHeroically);
+
+        try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
+             PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim)) {
+
+          long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
+          if (rightCount != nextRightCount) {
+            throw new IllegalStateException("wrong number of points in split: expected=" + rightCount + " but actual=" + nextRightCount);
+          }
+
+          leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
+          rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
+        } catch (Throwable t) {
+          verifyChecksum(t, slices[dim].writer);
+        }
+      }
+
+      // Recurse on left tree:
+      build(2*nodeID, leafNodeOffset, leftSlices,
+            ordBitSet, out,
+            minPackedValue, maxSplitPackedValue,
+            splitPackedValues, leafBlockFPs, toCloseHeroically);
+      for(int dim=0;dim<numDims;dim++) {
+        // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
+        if (dim != splitDim) {
+          leftSlices[dim].writer.destroy();
+        }
+      }
+
+      // TODO: we could "tail recurse" here?  have our parent discard its refs as we recurse right?
+      // Recurse on right tree:
+      build(2*nodeID+1, leafNodeOffset, rightSlices,
+            ordBitSet, out,
+            minSplitPackedValue, maxPackedValue,
+            splitPackedValues, leafBlockFPs, toCloseHeroically);
+      for(int dim=0;dim<numDims;dim++) {
+        // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
+        if (dim != splitDim) {
+          rightSlices[dim].writer.destroy();
+        }
+      }
+    }
+  }
+
+  // only called from assert
+  private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
+      IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
+    byte[] lastPackedValue = new byte[packedBytesLength];
+    int lastDoc = -1;
+    for (int i=0;i<count;i++) {
+      BytesRef packedValue = values.apply(i);
+      assert packedValue.length == packedBytesLength;
+      assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
+          docs[docsOffset + i], lastDoc);
+      lastDoc = docs[docsOffset + i];
+
+      // Make sure this value does in fact fall within this leaf cell:
+      assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
+    }
+    return true;
+  }
+
+  // only called from assert
+  private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
+      int doc, int lastDoc) {
+    int dimOffset = sortedDim * bytesPerDim;
+    if (ord > 0) {
+      int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
+      if (cmp > 0) {
+        throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
+      }
+      if (cmp == 0 && doc < lastDoc) {
+        throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
+      }
+    }
+    System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
+    return true;
+  }
+
+  PointWriter getPointWriter(long count, String desc) throws IOException {
+    if (count <= maxPointsSortInHeap) {
+      int size = Math.toIntExact(count);
+      return new HeapPointWriter(size, size, packedBytesLength, longOrds, singleValuePerDoc);
+    } else {
+      return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, count, singleValuePerDoc);
+    }
+  }
+
+  private void write(IndexOutput out, String s) throws IOException {
+    SimpleTextUtil.write(out, s, scratch);
+  }
+
+  private void writeInt(IndexOutput out, int x) throws IOException {
+    SimpleTextUtil.write(out, Integer.toString(x), scratch);
+  }
+
+  private void writeLong(IndexOutput out, long x) throws IOException {
+    SimpleTextUtil.write(out, Long.toString(x), scratch);
+  }
+
+  private void write(IndexOutput out, BytesRef b) throws IOException {
+    SimpleTextUtil.write(out, b);
+  }
+
+  private void newline(IndexOutput out) throws IOException {
+    SimpleTextUtil.writeNewline(out);
+  }
+}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
index f7ff16ecbc2..453bd2384b2 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
@@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.bkd.BKDReader;
 
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_FP;
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BYTES_PER_DIM;
@@ -58,7 +57,7 @@ class SimpleTextPointsReader extends PointsReader {
 
   private final IndexInput dataIn;
   final SegmentReadState readState;
-  final Map<String,BKDReader> readers = new HashMap<>();
+  final Map<String,SimpleTextBKDReader> readers = new HashMap<>();
   final BytesRefBuilder scratch = new BytesRefBuilder();
 
   public SimpleTextPointsReader(SegmentReadState readState) throws IOException {
@@ -98,7 +97,7 @@ class SimpleTextPointsReader extends PointsReader {
     this.readState = readState;
   }
 
-  private BKDReader initReader(long fp) throws IOException {
+  private SimpleTextBKDReader initReader(long fp) throws IOException {
     // NOTE: matches what writeIndex does in SimpleTextPointsWriter
     dataIn.seek(fp);
     readLine(dataIn);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
index c06c128d154..9d2db890fa0 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs.simpletext;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.function.IntFunction;
 
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.codecs.PointsWriter;
@@ -33,29 +32,28 @@ import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.bkd.BKDWriter;
 
 class SimpleTextPointsWriter extends PointsWriter {
 
-  final static BytesRef NUM_DIMS      = new BytesRef("num dims ");
-  final static BytesRef BYTES_PER_DIM = new BytesRef("bytes per dim ");
-  final static BytesRef MAX_LEAF_POINTS = new BytesRef("max leaf points ");
-  final static BytesRef INDEX_COUNT = new BytesRef("index count ");
-  final static BytesRef BLOCK_COUNT   = new BytesRef("block count ");
-  final static BytesRef BLOCK_DOC_ID  = new BytesRef("  doc ");
-  final static BytesRef BLOCK_FP      = new BytesRef("  block fp ");
-  final static BytesRef BLOCK_VALUE   = new BytesRef("  block value ");
-  final static BytesRef SPLIT_COUNT   = new BytesRef("split count ");
-  final static BytesRef SPLIT_DIM     = new BytesRef("  split dim ");
-  final static BytesRef SPLIT_VALUE   = new BytesRef("  split value ");
-  final static BytesRef FIELD_COUNT   = new BytesRef("field count ");
-  final static BytesRef FIELD_FP_NAME = new BytesRef("  field fp name ");
-  final static BytesRef FIELD_FP      = new BytesRef("  field fp ");
-  final static BytesRef MIN_VALUE     = new BytesRef("min value ");
-  final static BytesRef MAX_VALUE     = new BytesRef("max value ");
-  final static BytesRef POINT_COUNT   = new BytesRef("point count ");
-  final static BytesRef DOC_COUNT     = new BytesRef("doc count ");
-  final static BytesRef END           = new BytesRef("END");
+  public final static BytesRef NUM_DIMS      = new BytesRef("num dims ");
+  public final static BytesRef BYTES_PER_DIM = new BytesRef("bytes per dim ");
+  public final static BytesRef MAX_LEAF_POINTS = new BytesRef("max leaf points ");
+  public final static BytesRef INDEX_COUNT = new BytesRef("index count ");
+  public final static BytesRef BLOCK_COUNT   = new BytesRef("block count ");
+  public final static BytesRef BLOCK_DOC_ID  = new BytesRef("  doc ");
+  public final static BytesRef BLOCK_FP      = new BytesRef("  block fp ");
+  public final static BytesRef BLOCK_VALUE   = new BytesRef("  block value ");
+  public final static BytesRef SPLIT_COUNT   = new BytesRef("split count ");
+  public final static BytesRef SPLIT_DIM     = new BytesRef("  split dim ");
+  public final static BytesRef SPLIT_VALUE   = new BytesRef("  split value ");
+  public final static BytesRef FIELD_COUNT   = new BytesRef("field count ");
+  public final static BytesRef FIELD_FP_NAME = new BytesRef("  field fp name ");
+  public final static BytesRef FIELD_FP      = new BytesRef("  field fp ");
+  public final static BytesRef MIN_VALUE     = new BytesRef("min value ");
+  public final static BytesRef MAX_VALUE     = new BytesRef("max value ");
+  public final static BytesRef POINT_COUNT   = new BytesRef("point count ");
+  public final static BytesRef DOC_COUNT     = new BytesRef("doc count ");
+  public final static BytesRef END           = new BytesRef("END");
 
   private IndexOutput dataOut;
   final BytesRefBuilder scratch = new BytesRefBuilder();
@@ -75,105 +73,15 @@ class SimpleTextPointsWriter extends PointsWriter {
     boolean singleValuePerDoc = values.size() == values.getDocCount();
 
     // We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
-    try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
-                                          writeState.directory,
-                                          writeState.segmentInfo.name,
-                                          fieldInfo.getPointDimensionCount(),
-                                          fieldInfo.getPointNumBytes(),
-                                          BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
-                                          BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
-                                          values.size(),
-                                          singleValuePerDoc) {
-
-        @Override
-        protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
-          write(out, NUM_DIMS);
-          writeInt(out, numDims);
-          newline(out);
-
-          write(out, BYTES_PER_DIM);
-          writeInt(out, bytesPerDim);
-          newline(out);
-
-          write(out, MAX_LEAF_POINTS);
-          writeInt(out, maxPointsInLeafNode);
-          newline(out);
-
-          write(out, INDEX_COUNT);
-          writeInt(out, leafBlockFPs.length);
-          newline(out);
-
-          write(out, MIN_VALUE);
-          BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length);
-          write(out, br.toString());
-          newline(out);
-
-          write(out, MAX_VALUE);
-          br = new BytesRef(maxPackedValue, 0, maxPackedValue.length);
-          write(out, br.toString());
-          newline(out);
-
-          write(out, POINT_COUNT);
-          writeLong(out, pointCount);
-          newline(out);
-
-          write(out, DOC_COUNT);
-          writeInt(out, docsSeen.cardinality());
-          newline(out);
-
-          for(int i=0;i<leafBlockFPs.length;i++) {
-            write(out, BLOCK_FP);
-            writeLong(out, leafBlockFPs[i]);
-            newline(out);
-          }
-
-          assert (splitPackedValues.length % (1 + fieldInfo.getPointNumBytes())) == 0;
-          int count = splitPackedValues.length / (1 + fieldInfo.getPointNumBytes());
-          assert count == leafBlockFPs.length;
-
-          write(out, SPLIT_COUNT);
-          writeInt(out, count);
-          newline(out);
-
-          for(int i=0;i<count;i++) {
-            write(out, SPLIT_DIM);
-            writeInt(out, splitPackedValues[i * (1 + fieldInfo.getPointNumBytes())] & 0xff);
-            newline(out);
-            write(out, SPLIT_VALUE);
-            br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getPointNumBytes())), fieldInfo.getPointNumBytes());
-            write(out, br.toString());
-            newline(out);
-          }
-        }
-
-        @Override
-        protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
-          write(out, BLOCK_COUNT);
-          writeInt(out, count);
-          newline(out);
-          for(int i=0;i<count;i++) {
-            write(out, BLOCK_DOC_ID);
-            writeInt(out, docIDs[start+i]);
-            newline(out);
-          }
-        }
-
-        @Override
-        protected void writeCommonPrefixes(IndexOutput out, int[] commonPrefixLengths, byte[] packedValue) {
-          // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
-        }
-
-        @Override
-        protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
-          for (int i = 0; i < count; ++i) {
-            BytesRef packedValue = packedValues.apply(i);
-            // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
-            write(out, BLOCK_VALUE);
-            write(out, packedValue.toString());
-            newline(out);
-          }
-        }
-      }) {
+    try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(),
+                                                              writeState.directory,
+                                                              writeState.segmentInfo.name,
+                                                              fieldInfo.getPointDimensionCount(),
+                                                              fieldInfo.getPointNumBytes(),
+                                                              SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
+                                                              SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
+                                                              values.size(),
+                                                              singleValuePerDoc)) {
 
       values.intersect(new IntersectVisitor() {
           @Override
@@ -198,26 +106,6 @@ class SimpleTextPointsWriter extends PointsWriter {
     }
   }
 
-  private void write(IndexOutput out, String s) throws IOException {
-    SimpleTextUtil.write(out, s, scratch);
-  }
-
-  private void writeInt(IndexOutput out, int x) throws IOException {
-    SimpleTextUtil.write(out, Integer.toString(x), scratch);
-  }
-
-  private void writeLong(IndexOutput out, long x) throws IOException {
-    SimpleTextUtil.write(out, Long.toString(x), scratch);
-  }
-
-  private void write(IndexOutput out, BytesRef b) throws IOException {
-    SimpleTextUtil.write(out, b);
-  }
-
-  private void newline(IndexOutput out) throws IOException {
-    SimpleTextUtil.writeNewline(out);
-  }
-
   @Override
   public void finish() throws IOException {
     SimpleTextUtil.write(dataOut, END);
@@ -250,4 +138,24 @@ class SimpleTextPointsWriter extends PointsWriter {
       }
     }
   }
+
+  private void write(IndexOutput out, String s) throws IOException {
+    SimpleTextUtil.write(out, s, scratch);
+  }
+
+  private void writeInt(IndexOutput out, int x) throws IOException {
+    SimpleTextUtil.write(out, Integer.toString(x), scratch);
+  }
+
+  private void writeLong(IndexOutput out, long x) throws IOException {
+    SimpleTextUtil.write(out, Long.toString(x), scratch);
+  }
+
+  private void write(IndexOutput out, BytesRef b) throws IOException {
+    SimpleTextUtil.write(out, b);
+  }
+
+  private void newline(IndexOutput out) throws IOException {
+    SimpleTextUtil.writeNewline(out);
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
index e558d0d4fa8..1d2285c73b6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
@@ -28,7 +28,8 @@ import org.apache.lucene.index.SegmentWriteState;
 
 /**
  * Lucene 6.0 point format, which encodes dimensional values in a block KD-tree structure
- * for fast shape intersection filtering. See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
+ * for fast 1D range and N dimesional shape intersection filtering.
+ * See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
  *
  * <p>This data structure is written as a series of blocks on disk, with an in-memory perfectly balanced
  * binary tree of split values referencing those blocks at the leaves.
@@ -50,10 +51,13 @@ import org.apache.lucene.index.SegmentWriteState;
  *  <li> maxPointsInLeafNode (vInt)
  *  <li> bytesPerDim (vInt)
  *  <li> count (vInt)
- *  <li> byte[bytesPerDim]<sup>count</sup> (packed <code>byte[]</code> all split values)
- *  <li> delta-blockFP (vLong)<sup>count</sup> (delta-coded file pointers to the on-disk leaf blocks))
+ *  <li> packed index (byte[])
  * </ul>
  *
+ * <p>The packed index uses hierarchical delta and prefix coding to compactly encode the file pointer for
+ * all leaf blocks, once the tree is traversed, as well as the split dimension and split value for each
+ * inner node of the tree.
+ *
  * <p>After all fields blocks + index data are written, {@link CodecUtil#writeFooter} writes the checksum.
  *
  * <p>The <code>.dii</code> file records the file pointer in the <code>.dim</code> file where each field's
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
index 8968a6d624c..a914001d9d2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
@@ -16,7 +16,7 @@
  */
 
 /**
- * Components from the Lucene 6.0 index format.  See {@link org.apache.lucene.codecs.lucene62}
- * for an overview of the index format.
+ * Components from the Lucene 6.0 index format.  See {@link org.apache.lucene.codecs.lucene70}
+ * for an overview of the current index format.
  */
 package org.apache.lucene.codecs.lucene60;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
index 2fe2dc74b4a..fb556732d08 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
@@ -17,8 +17,8 @@
 
 /**
  * Components from the Lucene 6.2 index format
- * See {@link org.apache.lucene.codecs.lucene62} for an overview
- * of the index format.
+ * See {@link org.apache.lucene.codecs.lucene70} for an overview
+ * of the current index format.
  */
 
 package org.apache.lucene.codecs.lucene62;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
index 9b432f7c4f4..cab2859766e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
@@ -185,6 +185,12 @@
  * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. 
  * An optional file indicating which documents are live.
  * </li>
+ * <li>
+ * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.
+ * Optional pair of files, recording dimensionally indexed fields, to enable fast
+ * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D)
+ * and geographic shape intersection (2D, 3D).
+ * </li>
  * </ul>
  * <p>Details on each of these are provided in their linked pages.</p>
  * </div>
@@ -300,7 +306,12 @@
  * <tr>
  * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td>
  * <td>.liv</td>
- * <td>Info about what files are live</td>
+ * <td>Info about what documents are live</td>
+ * </tr>
+ * <tr>
+ * <td>{@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}</td>
+ * <td>.dii, .dim</td>
+ * <td>Holds indexed points, if any</td>
  * </tr>
  * </table>
  * </div>
@@ -374,6 +385,8 @@
  * that is suitable for faceting/sorting/analytics.
  * <li>In version 5.4, DocValues have been improved to store more information on disk:
  * addresses for binary fields and ord indexes for multi-valued fields.
+ * <li>In version 6.0, Points were added, for multi-dimensional range/distance search.
+ * <li>In version 6.2, new Segment info format that reads/writes the index sort, to support index sorting.
  * <li>In version 7.0, DocValues have been improved to better support sparse doc values
  * thanks to an iterator API.
  * </li>
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 7bc08f3c4a8..fd8011d4d07 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -1801,161 +1801,32 @@ public final class CheckIndex implements Closeable {
         }
         for (FieldInfo fieldInfo : fieldInfos) {
           if (fieldInfo.getPointDimensionCount() > 0) {
-            FixedBitSet docsSeen = new FixedBitSet(reader.maxDoc());
-            status.totalValueFields++;
-            int dimCount = fieldInfo.getPointDimensionCount();
-            int bytesPerDim = fieldInfo.getPointNumBytes();
-            int packedBytesCount = dimCount * bytesPerDim;
-            byte[] lastMinPackedValue = new byte[packedBytesCount];
-            byte[] lastMaxPackedValue = new byte[packedBytesCount];
-            BytesRef scratch = new BytesRef();
-            scratch.length = bytesPerDim;
-            byte[] lastPackedValue = new byte[packedBytesCount];
-
-            long[] pointCountSeen = new long[1];
-
             PointValues values = pointsReader.getValues(fieldInfo.name);
             if (values == null) {
               continue;
             }
-            byte[] globalMinPackedValue = values.getMinPackedValue();
+
+            status.totalValueFields++;
+
             long size = values.size();
             int docCount = values.getDocCount();
 
-            if (docCount > size) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points and inconsistent docCount=" + docCount);
+            VerifyPointsVisitor visitor = new VerifyPointsVisitor(fieldInfo.name, reader.maxDoc(), values);
+            values.intersect(visitor);
+
+            if (visitor.getPointCountSeen() != size) {
+              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + visitor.getPointCountSeen());
             }
 
-            if (docCount > reader.maxDoc()) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but that's greater than maxDoc=" + reader.maxDoc());
+            if (visitor.getDocCountSeen() != docCount) {
+              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but in fact has " + visitor.getDocCountSeen());
             }
 
-            if (globalMinPackedValue == null) {
-              if (size != 0) {
-                throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
-              }
-            } else if (globalMinPackedValue.length != packedBytesCount) {
-              throw new RuntimeException("getMinPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
-            }
-            byte[] globalMaxPackedValue = values.getMaxPackedValue();
-            if (globalMaxPackedValue == null) {
-              if (size != 0) {
-                throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
-              }
-            } else if (globalMaxPackedValue.length != packedBytesCount) {
-              throw new RuntimeException("getMaxPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
-            }
-
-            values.intersect(new PointValues.IntersectVisitor() {
-
-                               private int lastDocID = -1;
-
-                               @Override
-                               public void visit(int docID) {
-                                 throw new RuntimeException("codec called IntersectVisitor.visit without a packed value for docID=" + docID);
-                               }
-
-                               @Override
-                               public void visit(int docID, byte[] packedValue) {
-                                 checkPackedValue("packed value", packedValue, docID);
-                                 pointCountSeen[0]++;
-                                 docsSeen.set(docID);
-
-                                 for(int dim=0;dim<dimCount;dim++) {
-                                   int offset = bytesPerDim * dim;
-
-                                   // Compare to last cell:
-                                   if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
-                                     // This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
-                                     throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldInfo.name + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
-                                   }
-
-                                   if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
-                                     // This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
-                                     throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldInfo.name + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
-                                   }
-                                 }
-
-                                 // In the 1D case, PointValues must make a single in-order sweep through all values, and tie-break by
-                                 // increasing docID:
-                                 if (dimCount == 1) {
-                                   int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0);
-                                   if (cmp > 0) {
-                                     throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldInfo.name + "\", for docID=" + docID + " is out-of-order vs the previous document's value " + Arrays.toString(lastPackedValue));
-                                   } else if (cmp == 0) {
-                                     if (docID < lastDocID) {
-                                       throw new RuntimeException("packed points value is the same, but docID=" + docID + " is out of order vs previous docID=" + lastDocID + ", field=\"" + fieldInfo.name + "\"");
-                                     }
-                                   }
-                                   System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
-                                   lastDocID = docID;
-                                 }
-
-                                 status.totalValuePoints++;
-                               }
-
-                               @Override
-                               public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-                                 checkPackedValue("min packed value", minPackedValue, -1);
-                                 System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
-                                 checkPackedValue("max packed value", maxPackedValue, -1);
-                                 System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
-
-                                 for(int dim=0;dim<dimCount;dim++) {
-                                   int offset = bytesPerDim * dim;
-
-                                   if (StringHelper.compare(bytesPerDim, minPackedValue, offset, maxPackedValue, offset) > 0) {
-                                     throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
-                                                                " is out-of-bounds of the cell's maxPackedValue " + Arrays.toString(maxPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-
-                                   // Make sure this cell is not outside of the global min/max:
-                                   if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
-                                     throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
-                                                                " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-
-                                   if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
-                                     throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
-                                                                " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-
-                                   if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
-                                     throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
-                                                                " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-                                   if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
-                                     throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
-                                                                " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-                                 }                                   
-
-                                 // We always pretend the query shape is so complex that it crosses every cell, so
-                                 // that packedValue is passed for every document
-                                 return PointValues.Relation.CELL_CROSSES_QUERY;
-                               }
-
-                               private void checkPackedValue(String desc, byte[] packedValue, int docID) {
-                                 if (packedValue == null) {
-                                   throw new RuntimeException(desc + " is null for docID=" + docID + " field=\"" + fieldInfo.name + "\"");
-                                 }
-
-                                 if (packedValue.length != packedBytesCount) {
-                                   throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID + " field=\"" + fieldInfo.name + "\"");
-                                 }
-                               }
-                             });
-
-            if (pointCountSeen[0] != size) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + pointCountSeen[0]);
-            }
-
-            if (docsSeen.cardinality() != docCount) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but in fact has " + docsSeen.cardinality());
-            }
+            status.totalValuePoints += visitor.getPointCountSeen();
           }
         }
       }
+
       msg(infoStream, String.format(Locale.ROOT, "OK [%d fields, %d points] [took %.3f sec]", status.totalValueFields, status.totalValuePoints, nsToSec(System.nanoTime()-startNS)));
 
     } catch (Throwable e) {
@@ -1972,6 +1843,167 @@ public final class CheckIndex implements Closeable {
     return status;
   }
 
+  /** Walks the entire N-dimensional points space, verifying that all points fall within the last cell's boundaries.
+   *
+   * @lucene.internal */
+  public static class VerifyPointsVisitor implements PointValues.IntersectVisitor {
+    private long pointCountSeen;
+    private int lastDocID = -1;
+    private final int maxDoc;
+    private final FixedBitSet docsSeen;
+    private final byte[] lastMinPackedValue;
+    private final byte[] lastMaxPackedValue;
+    private final byte[] lastPackedValue;
+    private final byte[] globalMinPackedValue;
+    private final byte[] globalMaxPackedValue;
+    private final int packedBytesCount;
+    private final int numDims;
+    private final int bytesPerDim;
+    private final String fieldName;
+
+    /** Sole constructor */
+    public VerifyPointsVisitor(String fieldName, int maxDoc, PointValues values) throws IOException {
+      this.maxDoc = maxDoc;
+      this.fieldName = fieldName;
+      numDims = values.getNumDimensions();
+      bytesPerDim = values.getBytesPerDimension();
+      packedBytesCount = numDims * bytesPerDim;
+      globalMinPackedValue = values.getMinPackedValue();
+      globalMaxPackedValue = values.getMaxPackedValue();
+      docsSeen = new FixedBitSet(maxDoc);
+      lastMinPackedValue = new byte[packedBytesCount];
+      lastMaxPackedValue = new byte[packedBytesCount];
+      lastPackedValue = new byte[packedBytesCount];
+
+      if (values.getDocCount() > values.size()) {
+        throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have size=" + values.size() + " points and inconsistent docCount=" + values.getDocCount());
+      }
+
+      if (values.getDocCount() > maxDoc) {
+        throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have docCount=" + values.getDocCount() + " but that's greater than maxDoc=" + maxDoc);
+      }
+
+      if (globalMinPackedValue == null) {
+        if (values.size() != 0) {
+          throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size());
+        }
+      } else if (globalMinPackedValue.length != packedBytesCount) {
+        throw new RuntimeException("getMinPackedValue for field \"" + fieldName + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
+      }
+      if (globalMaxPackedValue == null) {
+        if (values.size() != 0) {
+          throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size());
+        }
+      } else if (globalMaxPackedValue.length != packedBytesCount) {
+        throw new RuntimeException("getMaxPackedValue for field \"" + fieldName + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
+      }
+    }
+
+    /** Returns total number of points in this BKD tree */
+    public long getPointCountSeen() {
+      return pointCountSeen;
+    }
+
+    /** Returns total number of unique docIDs in this BKD tree */
+    public long getDocCountSeen() {
+      return docsSeen.cardinality();
+    }
+
+    @Override
+    public void visit(int docID) {
+      throw new RuntimeException("codec called IntersectVisitor.visit without a packed value for docID=" + docID);
+    }
+
+    @Override
+    public void visit(int docID, byte[] packedValue) {
+      checkPackedValue("packed value", packedValue, docID);
+      pointCountSeen++;
+      docsSeen.set(docID);
+
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = bytesPerDim * dim;
+
+        // Compare to last cell:
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
+          // This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
+          throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldName + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
+        }
+
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
+          // This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
+          throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldName + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
+        }
+      }
+
+      // In the 1D case, PointValues must make a single in-order sweep through all values, and tie-break by
+      // increasing docID:
+      if (numDims == 1) {
+        int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0);
+        if (cmp > 0) {
+          throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldName + "\", for docID=" + docID + " is out-of-order vs the previous document's value " + Arrays.toString(lastPackedValue));
+        } else if (cmp == 0) {
+          if (docID < lastDocID) {
+            throw new RuntimeException("packed points value is the same, but docID=" + docID + " is out of order vs previous docID=" + lastDocID + ", field=\"" + fieldName + "\"");
+          }
+        }
+        System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
+        lastDocID = docID;
+      }
+    }
+
+    @Override
+    public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+      checkPackedValue("min packed value", minPackedValue, -1);
+      System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
+      checkPackedValue("max packed value", maxPackedValue, -1);
+      System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
+
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = bytesPerDim * dim;
+
+        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, maxPackedValue, offset) > 0) {
+          throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
+                                     " is out-of-bounds of the cell's maxPackedValue " + Arrays.toString(maxPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+
+        // Make sure this cell is not outside of the global min/max:
+        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
+          throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
+                                     " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+
+        if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
+          throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
+                                     " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+
+        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
+          throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
+                                     " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+        if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
+          throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
+                                     " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+      }                                   
+
+      // We always pretend the query shape is so complex that it crosses every cell, so
+      // that packedValue is passed for every document
+      return PointValues.Relation.CELL_CROSSES_QUERY;
+    }
+
+    private void checkPackedValue(String desc, byte[] packedValue, int docID) {
+      if (packedValue == null) {
+        throw new RuntimeException(desc + " is null for docID=" + docID + " field=\"" + fieldName + "\"");
+      }
+
+      if (packedValue.length != packedBytesCount) {
+        throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID + " field=\"" + fieldName + "\"");
+      }
+    }
+  }
+
+  
   /**
    * Test stored fields.
    * @lucene.experimental
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index 6bf7dfc1a86..6cccf4cf1d1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -17,14 +17,15 @@
 package org.apache.lucene.util.bkd;
 
 import java.io.IOException;
-import java.util.Arrays;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.PointValues;
+import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 
@@ -32,14 +33,12 @@ import org.apache.lucene.util.StringHelper;
  *
  * @lucene.experimental */
 
-public class BKDReader extends PointValues implements Accountable {
+public final class BKDReader extends PointValues implements Accountable {
   // Packed array of byte[] holding all split values in the full binary tree:
-  final private byte[] splitPackedValues; 
-  final long[] leafBlockFPs;
-  final private int leafNodeOffset;
+  final int leafNodeOffset;
   final int numDims;
   final int bytesPerDim;
-  final int bytesPerIndexEntry;
+  final int numLeaves;
   final IndexInput in;
   final int maxPointsInLeafNode;
   final byte[] minPackedValue;
@@ -49,6 +48,14 @@ public class BKDReader extends PointValues implements Accountable {
   final int version;
   protected final int packedBytesLength;
 
+  // Used for 6.4.0+ index format:
+  final byte[] packedIndex;
+
+  // Used for Legacy (pre-6.4.0) index format, to hold a compact form of the index:
+  final private byte[] splitPackedValues; 
+  final int bytesPerIndexEntry;
+  final long[] leafBlockFPs;
+
   /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
   public BKDReader(IndexInput in) throws IOException {
     version = CodecUtil.checkHeader(in, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
@@ -59,7 +66,7 @@ public class BKDReader extends PointValues implements Accountable {
     packedBytesLength = numDims * bytesPerDim;
 
     // Read index:
-    int numLeaves = in.readVInt();
+    numLeaves = in.readVInt();
     assert numLeaves > 0;
     leafNodeOffset = numLeaves;
 
@@ -78,203 +85,378 @@ public class BKDReader extends PointValues implements Accountable {
     pointCount = in.readVLong();
     docCount = in.readVInt();
 
-    splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
-
-    // TODO: don't write split packed values[0]!
-    in.readBytes(splitPackedValues, 0, splitPackedValues.length);
-
-    // Read the file pointers to the start of each leaf block:
-    long[] leafBlockFPs = new long[numLeaves];
-    long lastFP = 0;
-    for(int i=0;i<numLeaves;i++) {
-      long delta = in.readVLong();
-      leafBlockFPs[i] = lastFP + delta;
-      lastFP += delta;
-    }
-
-    // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
-    // if it was created by BKDWriter.merge).  In this case the leaf nodes may straddle the two bottom
-    // levels of the binary tree:
-    if (numDims == 1 && numLeaves > 1) {
-      //System.out.println("BKDR: numLeaves=" + numLeaves);
-      int levelCount = 2;
-      while (true) {
-        //System.out.println("  cycle levelCount=" + levelCount);
-        if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
-          int lastLevel = 2*(numLeaves - levelCount);
-          assert lastLevel >= 0;
-          /*
-          System.out.println("BKDR: lastLevel=" + lastLevel + " vs " + levelCount);
-          System.out.println("FPs before:");
-          for(int i=0;i<leafBlockFPs.length;i++) {
-            System.out.println("  " + i + " " + leafBlockFPs[i]);
-          }
-          */
-          if (lastLevel != 0) {
-            // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
-            // at read-time, so that we can still delta code them on disk at write:
-            //System.out.println("BKDR: now rotate index");
-            long[] newLeafBlockFPs = new long[numLeaves];
-            System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
-            System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
-            leafBlockFPs = newLeafBlockFPs;
-          }
-          /*
-          System.out.println("FPs:");
-          for(int i=0;i<leafBlockFPs.length;i++) {
-            System.out.println("  " + i + " " + leafBlockFPs[i]);
-          }
-          */
-          break;
-        }
-
-        levelCount *= 2;
-      }
-    }
-
-    this.leafBlockFPs = leafBlockFPs;
-    this.in = in;
-  }
-
-  /** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
-  protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
-                      byte[] minPackedValue, byte[] maxPackedValue, long pointCount, int docCount) throws IOException {
-    this.in = in;
-    this.numDims = numDims;
-    this.maxPointsInLeafNode = maxPointsInLeafNode;
-    this.bytesPerDim = bytesPerDim;
-    // no version check here because callers of this API (SimpleText) have no back compat:
-    bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
-    packedBytesLength = numDims * bytesPerDim;
-    this.leafNodeOffset = leafBlockFPs.length;
-    this.leafBlockFPs = leafBlockFPs;
-    this.splitPackedValues = splitPackedValues;
-    this.minPackedValue = minPackedValue;
-    this.maxPackedValue = maxPackedValue;
-    this.pointCount = pointCount;
-    this.docCount = docCount;
-    this.version = BKDWriter.VERSION_CURRENT;
-    assert minPackedValue.length == packedBytesLength;
-    assert maxPackedValue.length == packedBytesLength;
-  }
-
-  private static class VerifyVisitor implements IntersectVisitor {
-    byte[] cellMinPacked;
-    byte[] cellMaxPacked;
-    byte[] lastPackedValue;
-    final int numDims;
-    final int bytesPerDim;
-    final int maxDoc;
-
-    public VerifyVisitor(int numDims, int bytesPerDim, int maxDoc) {
-      this.numDims = numDims;
-      this.bytesPerDim = bytesPerDim;
-      this.maxDoc = maxDoc;
-    }
-
-    @Override
-    public void visit(int docID) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void visit(int docID, byte[] packedValue) {
-      if (docID < 0 || docID >= maxDoc) {
-        throw new RuntimeException("docID=" + docID + " is out of bounds of 0.." + maxDoc);
-      }
-      for(int dim=0;dim<numDims;dim++) {
-        if (StringHelper.compare(bytesPerDim, cellMinPacked, dim*bytesPerDim, packedValue, dim*bytesPerDim) > 0) {
-          throw new RuntimeException("value=" + new BytesRef(packedValue, dim*bytesPerDim, bytesPerDim) + " for docID=" + docID + " dim=" + dim + " is less than this leaf block's minimum=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim));
-        }
-        if (StringHelper.compare(bytesPerDim, cellMaxPacked, dim*bytesPerDim, packedValue, dim*bytesPerDim) < 0) {
-          throw new RuntimeException("value=" + new BytesRef(packedValue, dim*bytesPerDim, bytesPerDim) + " for docID=" + docID + " dim=" + dim + " is greater than this leaf block's maximum=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
-        }
-      }
-
-      if (numDims == 1) {
-        // With only 1D, all values should always be in sorted order
-        if (lastPackedValue == null) {
-          lastPackedValue = Arrays.copyOf(packedValue, packedValue.length);
-        } else if (StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0) > 0) {
-          throw new RuntimeException("value=" + new BytesRef(packedValue) + " for docID=" + docID + " dim=0" + " sorts before last value=" + new BytesRef(lastPackedValue));
-        } else {
-          System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
-        }
-      }
-    }
-
-    @Override
-    public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  /** Only used for debugging, to make sure all values in each leaf block fall within the range expected by the index */
-  // TODO: maybe we can get this into CheckIndex?
-  public void verify(int maxDoc) throws IOException {
-    //System.out.println("BKDR.verify this=" + this);
-    // Visits every doc in every leaf block and confirms that
-    // their values agree with the index:
-    byte[] rootMinPacked = new byte[packedBytesLength];
-    byte[] rootMaxPacked = new byte[packedBytesLength];
-    Arrays.fill(rootMaxPacked, (byte) 0xff);
-    verify(getIntersectState(new VerifyVisitor(numDims, bytesPerDim, maxDoc)), 1, rootMinPacked, rootMaxPacked);
-  }
-
-  private void verify(IntersectState state, int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
-
-    if (nodeID >= leafNodeOffset) {
-      int leafID = nodeID - leafNodeOffset;
-
-      // In the unbalanced case it's possible the left most node only has one child:
-      if (leafID < leafBlockFPs.length) {
-        //System.out.println("CHECK nodeID=" + nodeID + " leaf=" + (nodeID-leafNodeOffset) + " offset=" + leafNodeOffset + " fp=" + leafBlockFPs[leafID]);
-        //System.out.println("BKDR.verify leafID=" + leafID + " nodeID=" + nodeID + " fp=" + leafBlockFPs[leafID] + " min=" + new BytesRef(cellMinPacked) + " max=" + new BytesRef(cellMaxPacked));
-
-        // Leaf node: check that all values are in fact in bounds:
-        VerifyVisitor visitor = (VerifyVisitor) state.visitor;
-        visitor.cellMinPacked = cellMinPacked;
-        visitor.cellMaxPacked = cellMaxPacked;
-
-        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
-        visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
-      } else {
-        //System.out.println("BKDR.verify skip leafID=" + leafID);
-      }
+    if (version >= BKDWriter.VERSION_PACKED_INDEX) {
+      int numBytes = in.readVInt();
+      packedIndex = new byte[numBytes];
+      in.readBytes(packedIndex, 0, numBytes);
+      leafBlockFPs = null;
+      splitPackedValues = null;
     } else {
-      // Non-leaf node:
+      // legacy un-packed index
 
-      int address = nodeID * bytesPerIndexEntry;
-      int splitDim;
-      if (numDims == 1) {
-        splitDim = 0;
-        if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
-          // skip over wastefully encoded 0 splitDim:
-          assert splitPackedValues[address] == 0;
-          address++;
+      splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
+
+      in.readBytes(splitPackedValues, 0, splitPackedValues.length);
+
+      // Read the file pointers to the start of each leaf block:
+      long[] leafBlockFPs = new long[numLeaves];
+      long lastFP = 0;
+      for(int i=0;i<numLeaves;i++) {
+        long delta = in.readVLong();
+        leafBlockFPs[i] = lastFP + delta;
+        lastFP += delta;
+      }
+
+      // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
+      // if it was created by BKDWriter.merge or OneDimWriter).  In this case the leaf nodes may straddle the two bottom
+      // levels of the binary tree:
+      if (numDims == 1 && numLeaves > 1) {
+        int levelCount = 2;
+        while (true) {
+          if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
+            int lastLevel = 2*(numLeaves - levelCount);
+            assert lastLevel >= 0;
+            if (lastLevel != 0) {
+              // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
+              // at read-time, so that we can still delta code them on disk at write:
+              long[] newLeafBlockFPs = new long[numLeaves];
+              System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
+              System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
+              leafBlockFPs = newLeafBlockFPs;
+            }
+            break;
+          }
+
+          levelCount *= 2;
         }
-      } else {
-        splitDim = splitPackedValues[address++] & 0xff;
       }
       
-      assert splitDim < numDims;
-
-      byte[] splitPackedValue = new byte[packedBytesLength];
-
-      // Recurse on left sub-tree:
-      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      verify(state,
-             2*nodeID,
-             cellMinPacked, splitPackedValue);
-
-      // Recurse on right sub-tree:
-      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      verify(state,
-             2*nodeID+1,
-             splitPackedValue, cellMaxPacked);
+      this.leafBlockFPs = leafBlockFPs;
+      packedIndex = null;
     }
+
+    this.in = in;
+  }
+
+  long getMinLeafBlockFP() {
+    if (packedIndex != null) {
+      return new ByteArrayDataInput(packedIndex).readVLong();
+    } else {
+      long minFP = Long.MAX_VALUE;
+      for(long fp : leafBlockFPs) {
+        minFP = Math.min(minFP, fp);
+      }
+      return minFP;
+    }
+  }
+
+  /** Used to walk the in-heap index
+   *
+   * @lucene.internal */
+  public abstract class IndexTree implements Cloneable {
+    protected int nodeID;
+    // level is 1-based so that we can do level-1 w/o checking each time:
+    protected int level;
+    protected int splitDim;
+    protected final byte[][] splitPackedValueStack;
+
+    protected IndexTree() {
+      int treeDepth = getTreeDepth();
+      splitPackedValueStack = new byte[treeDepth+1][];
+      nodeID = 1;
+      level = 1;
+      splitPackedValueStack[level] = new byte[packedBytesLength];
+    }      
+
+    public void pushLeft() {
+      nodeID *= 2;
+      level++;
+      if (splitPackedValueStack[level] == null) {
+        splitPackedValueStack[level] = new byte[packedBytesLength];
+      }
+    }
+
+    /** Clone, but you are not allowed to pop up past the point where the clone happened. */
+    public abstract IndexTree clone();
+    
+    public void pushRight() {
+      nodeID = nodeID * 2 + 1;
+      level++;
+      if (splitPackedValueStack[level] == null) {
+        splitPackedValueStack[level] = new byte[packedBytesLength];
+      }
+    }
+
+    public void pop() {
+      nodeID /= 2;
+      level--;
+      splitDim = -1;
+      //System.out.println("  pop nodeID=" + nodeID);
+    }
+
+    public boolean isLeafNode() {
+      return nodeID >= leafNodeOffset;
+    }
+
+    public boolean nodeExists() {
+      return nodeID - leafNodeOffset < leafNodeOffset;
+    }
+
+    public int getNodeID() {
+      return nodeID;
+    }
+
+    public byte[] getSplitPackedValue() {
+      assert isLeafNode() == false;
+      assert splitPackedValueStack[level] != null: "level=" + level;
+      return splitPackedValueStack[level];
+    }
+                                                       
+    /** Only valid after pushLeft or pushRight, not pop! */
+    public int getSplitDim() {
+      assert isLeafNode() == false;
+      return splitDim;
+    }
+
+    /** Only valid after pushLeft or pushRight, not pop! */
+    public abstract BytesRef getSplitDimValue();
+    
+    /** Only valid after pushLeft or pushRight, not pop! */
+    public abstract long getLeafBlockFP();
+  }
+
+  /** Reads the original simple yet heap-heavy index format */
+  private final class LegacyIndexTree extends IndexTree {
+
+    private long leafBlockFP;
+    private final byte[] splitDimValue = new byte[bytesPerDim];
+    private final BytesRef scratch = new BytesRef();
+
+    public LegacyIndexTree() {
+      setNodeData();
+      scratch.bytes = splitDimValue;
+      scratch.length = bytesPerDim;
+    }
+
+    @Override
+    public LegacyIndexTree clone() {
+      LegacyIndexTree index = new LegacyIndexTree();
+      index.nodeID = nodeID;
+      index.level = level;
+      index.splitDim = splitDim;
+      index.leafBlockFP = leafBlockFP;
+      index.splitPackedValueStack[index.level] = splitPackedValueStack[index.level].clone();
+
+      return index;
+    }
+    
+    @Override
+    public void pushLeft() {
+      super.pushLeft();
+      setNodeData();
+    }
+    
+    @Override
+    public void pushRight() {
+      super.pushRight();
+      setNodeData();
+    }
+
+    private void setNodeData() {
+      if (isLeafNode()) {
+        leafBlockFP = leafBlockFPs[nodeID - leafNodeOffset];
+        splitDim = -1;
+      } else {
+        leafBlockFP = -1;
+        int address = nodeID * bytesPerIndexEntry;
+        if (numDims == 1) {
+          splitDim = 0;
+          if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
+            // skip over wastefully encoded 0 splitDim:
+            assert splitPackedValues[address] == 0;
+            address++;
+          }
+        } else {
+          splitDim = splitPackedValues[address++] & 0xff;
+        }
+        System.arraycopy(splitPackedValues, address, splitDimValue, 0, bytesPerDim);
+      }
+    }
+
+    @Override
+    public long getLeafBlockFP() {
+      assert isLeafNode();
+      return leafBlockFP;
+    }
+
+    @Override
+    public BytesRef getSplitDimValue() {
+      assert isLeafNode() == false;
+      return scratch;
+    }
+
+    @Override
+    public void pop() {
+      super.pop();
+      leafBlockFP = -1;
+    }
+  }
+
+  /** Reads the new packed byte[] index format which can be up to ~63% smaller than the legacy index format on 20M NYC taxis tests.  This
+   *  format takes advantage of the limited access pattern to the BKD tree at search time, i.e. starting at the root node and recursing
+   *  downwards one child at a time. */
+  private final class PackedIndexTree extends IndexTree {
+    // used to read the packed byte[]
+    private final ByteArrayDataInput in;
+    // holds the minimum (left most) leaf block file pointer for each level we've recursed to:
+    private final long[] leafBlockFPStack;
+    // holds the address, in the packed byte[] index, of the left-node of each level:
+    private final int[] leftNodePositions;
+    // holds the address, in the packed byte[] index, of the right-node of each level:
+    private final int[] rightNodePositions;
+    // holds the splitDim for each level:
+    private final int[] splitDims;
+    // true if the per-dim delta we read for the node at this level is a negative offset vs. the last split on this dim; this is a packed
+    // 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim].  this will be true if the last time we
+    // split on this dimension, we next pushed to the left sub-tree:
+    private final boolean[] negativeDeltas;
+    // holds the packed per-level split values; the intersect method uses this to save the cell min/max as it recurses:
+    private final byte[][] splitValuesStack;
+    // scratch value to return from getPackedValue:
+    private final BytesRef scratch;
+
+    public PackedIndexTree() {
+      int treeDepth = getTreeDepth();
+      leafBlockFPStack = new long[treeDepth+1];
+      leftNodePositions = new int[treeDepth+1];
+      rightNodePositions = new int[treeDepth+1];
+      splitValuesStack = new byte[treeDepth+1][];
+      splitDims = new int[treeDepth+1];
+      negativeDeltas = new boolean[numDims*(treeDepth+1)];
+
+      in = new ByteArrayDataInput(packedIndex);
+      splitValuesStack[0] = new byte[packedBytesLength];
+      readNodeData(false);
+      scratch = new BytesRef();
+      scratch.length = bytesPerDim;
+    }
+
+    @Override
+    public PackedIndexTree clone() {
+      PackedIndexTree index = new PackedIndexTree();
+      index.nodeID = nodeID;
+      index.level = level;
+      index.splitDim = splitDim;
+      System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
+      index.leafBlockFPStack[level] = leafBlockFPStack[level];
+      index.leftNodePositions[level] = leftNodePositions[level];
+      index.rightNodePositions[level] = rightNodePositions[level];
+      index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
+      System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
+      index.splitDims[level] = splitDims[level];
+      return index;
+    }
+
+    @Override
+    public void pushLeft() {
+      int nodePosition = leftNodePositions[level];
+      super.pushLeft();
+      System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
+      assert splitDim != -1;
+      negativeDeltas[level*numDims+splitDim] = true;
+      in.setPosition(nodePosition);
+      readNodeData(true);
+    }
+    
+    @Override
+    public void pushRight() {
+      int nodePosition = rightNodePositions[level];
+      super.pushRight();
+      System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
+      assert splitDim != -1;
+      negativeDeltas[level*numDims+splitDim] = false;
+      in.setPosition(nodePosition);
+      readNodeData(false);
+    }
+
+    @Override
+    public void pop() {
+      super.pop();
+      splitDim = splitDims[level];
+    }
+
+    @Override
+    public long getLeafBlockFP() {
+      assert isLeafNode(): "nodeID=" + nodeID + " is not a leaf";
+      return leafBlockFPStack[level];
+    }
+
+    @Override
+    public BytesRef getSplitDimValue() {
+      assert isLeafNode() == false;
+      scratch.bytes = splitValuesStack[level];
+      scratch.offset = splitDim * bytesPerDim;
+      return scratch;
+    }
+
+    private void readNodeData(boolean isLeft) {
+
+      leafBlockFPStack[level] = leafBlockFPStack[level-1];
+
+      // read leaf block FP delta
+      if (isLeft == false) {
+        leafBlockFPStack[level] += in.readVLong();
+      }
+
+      if (isLeafNode()) {
+        splitDim = -1;
+      } else {
+
+        // read split dim, prefix, firstDiffByteDelta encoded as int:
+        int code = in.readVInt();
+        splitDim = code % numDims;
+        splitDims[level] = splitDim;
+        code /= numDims;
+        int prefix = code % (1+bytesPerDim);
+        int suffix = bytesPerDim - prefix;
+
+        if (splitValuesStack[level] == null) {
+          splitValuesStack[level] = new byte[packedBytesLength];
+        }
+        System.arraycopy(splitValuesStack[level-1], 0, splitValuesStack[level], 0, packedBytesLength);
+        if (suffix > 0) {
+          int firstDiffByteDelta = code / (1+bytesPerDim);
+          if (negativeDeltas[level*numDims + splitDim]) {
+            firstDiffByteDelta = -firstDiffByteDelta;
+          }
+          int oldByte = splitValuesStack[level][splitDim*bytesPerDim+prefix] & 0xFF;
+          splitValuesStack[level][splitDim*bytesPerDim+prefix] = (byte) (oldByte + firstDiffByteDelta);
+          in.readBytes(splitValuesStack[level], splitDim*bytesPerDim+prefix+1, suffix-1);
+        } else {
+          // our split value is == last split value in this dim, which can happen when there are many duplicate values
+        }
+
+        int leftNumBytes;
+        if (nodeID * 2 < leafNodeOffset) {
+          leftNumBytes = in.readVInt();
+        } else {
+          leftNumBytes = 0;
+        }
+
+        leftNodePositions[level] = in.getPosition();
+        rightNodePositions[level] = leftNodePositions[level] + leftNumBytes;
+      }
+    }
+  }
+
+  private int getTreeDepth() {
+    // First +1 because all the non-leave nodes makes another power
+    // of 2; e.g. to have a fully balanced tree with 4 leaves you
+    // need a depth=3 tree:
+
+    // Second +1 because MathUtil.log computes floor of the logarithm; e.g.
+    // with 5 leaves you need a depth=4 tree:
+    return MathUtil.log(numLeaves, 2) + 2;
   }
 
   /** Used to track all state for a single call to {@link #intersect}. */
@@ -285,57 +467,73 @@ public class BKDReader extends PointValues implements Accountable {
     final int[] commonPrefixLengths;
 
     final IntersectVisitor visitor;
+    public final IndexTree index;
 
     public IntersectState(IndexInput in, int numDims,
                           int packedBytesLength,
                           int maxPointsInLeafNode,
-                          IntersectVisitor visitor) {
+                          IntersectVisitor visitor,
+                          IndexTree indexVisitor) {
       this.in = in;
       this.visitor = visitor;
       this.commonPrefixLengths = new int[numDims];
       this.scratchDocIDs = new int[maxPointsInLeafNode];
       this.scratchPackedValue = new byte[packedBytesLength];
+      this.index = indexVisitor;
     }
   }
 
   public void intersect(IntersectVisitor visitor) throws IOException {
-    intersect(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
+    intersect(getIntersectState(visitor), minPackedValue, maxPackedValue);
   }
 
   /** Fast path: this is called when the query box fully encompasses all cells under this node. */
-  private void addAll(IntersectState state, int nodeID) throws IOException {
+  private void addAll(IntersectState state) throws IOException {
     //System.out.println("R: addAll nodeID=" + nodeID);
 
-    if (nodeID >= leafNodeOffset) {
+    if (state.index.isLeafNode()) {
       //System.out.println("ADDALL");
-      visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
+      if (state.index.nodeExists()) {
+        visitDocIDs(state.in, state.index.getLeafBlockFP(), state.visitor);
+      }
       // TODO: we can assert that the first value here in fact matches what the index claimed?
     } else {
-      addAll(state, 2*nodeID);
-      addAll(state, 2*nodeID+1);
+      state.index.pushLeft();
+      addAll(state);
+      state.index.pop();
+
+      state.index.pushRight();
+      addAll(state);
+      state.index.pop();
     }
   }
 
   /** Create a new {@link IntersectState} */
   public IntersectState getIntersectState(IntersectVisitor visitor) {
+    IndexTree index;
+    if (packedIndex != null) {
+      index = new PackedIndexTree();
+    } else {
+      index = new LegacyIndexTree();
+    }
     return new IntersectState(in.clone(), numDims,
                               packedBytesLength,
                               maxPointsInLeafNode,
-                              visitor);
+                              visitor,
+                              index);
   }
 
   /** Visits all docIDs and packed values in a single leaf block */
-  public void visitLeafBlockValues(int nodeID, IntersectState state) throws IOException {
-    int leafID = nodeID - leafNodeOffset;
+  public void visitLeafBlockValues(IndexTree index, IntersectState state) throws IOException {
 
     // Leaf node; scan and filter all points in this block:
-    int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+    int count = readDocIDs(state.in, index.getLeafBlockFP(), state.scratchDocIDs);
 
     // Again, this time reading values and checking with the visitor
     visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
   }
 
-  protected void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
+  private void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
     // Leaf node
     in.seek(blockFP);
 
@@ -350,7 +548,7 @@ public class BKDReader extends PointValues implements Accountable {
     }
   }
 
-  protected int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
+  int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
     in.seek(blockFP);
 
     // How many points are stored in this leaf cell:
@@ -365,7 +563,7 @@ public class BKDReader extends PointValues implements Accountable {
     return count;
   }
 
-  protected void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+  void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
     visitor.grow(count);
 
     readCommonPrefixes(commonPrefixLengths, scratchPackedValue, in);
@@ -434,13 +632,10 @@ public class BKDReader extends PointValues implements Accountable {
     }
   }
 
-  private void intersect(IntersectState state,
-                         int nodeID,
-                         byte[] cellMinPacked, byte[] cellMaxPacked)
-    throws IOException {
+  private void intersect(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
 
     /*
-    System.out.println("\nR: intersect nodeID=" + nodeID);
+    System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
     for(int dim=0;dim<numDims;dim++) {
       System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
     }
@@ -450,24 +645,18 @@ public class BKDReader extends PointValues implements Accountable {
 
     if (r == Relation.CELL_OUTSIDE_QUERY) {
       // This cell is fully outside of the query shape: stop recursing
-      return;
     } else if (r == Relation.CELL_INSIDE_QUERY) {
       // This cell is fully inside of the query shape: recursively add all points in this cell without filtering
-      addAll(state, nodeID);
-      return;
-    } else {
-      // The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering
-    }
-
-    if (nodeID >= leafNodeOffset) {
+      addAll(state);
+      // The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering:
+    } else if (state.index.isLeafNode()) {
+      
       // TODO: we can assert that the first value here in fact matches what the index claimed?
-
-      int leafID = nodeID - leafNodeOffset;
       
       // In the unbalanced case it's possible the left most node only has one child:
-      if (leafID < leafBlockFPs.length) {
+      if (state.index.nodeExists()) {
         // Leaf node; scan and filter all points in this block:
-        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+        int count = readDocIDs(state.in, state.index.getLeafBlockFP(), state.scratchDocIDs);
 
         // Again, this time reading values and checking with the visitor
         visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
@@ -476,65 +665,45 @@ public class BKDReader extends PointValues implements Accountable {
     } else {
       
       // Non-leaf node: recurse on the split left and right nodes
-
-      int address = nodeID * bytesPerIndexEntry;
-      int splitDim;
-      if (numDims == 1) {
-        splitDim = 0;
-        if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
-          // skip over wastefully encoded 0 splitDim:
-          assert splitPackedValues[address] == 0;
-          address++;
-        }
-      } else {
-        splitDim = splitPackedValues[address++] & 0xff;
-      }
-      
+      int splitDim = state.index.getSplitDim();
+      assert splitDim >= 0: "splitDim=" + splitDim;
       assert splitDim < numDims;
 
-      // TODO: can we alloc & reuse this up front?
+      byte[] splitPackedValue = state.index.getSplitPackedValue();
+      BytesRef splitDimValue = state.index.getSplitDimValue();
+      assert splitDimValue.length == bytesPerDim;
+      //System.out.println("  splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
 
-      byte[] splitPackedValue = new byte[packedBytesLength];
+      // make sure cellMin <= splitValue <= cellMax:
+      assert StringHelper.compare(bytesPerDim, cellMinPacked, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset) <= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numDims=" + numDims;
+      assert StringHelper.compare(bytesPerDim, cellMaxPacked, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset) >= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numDims=" + numDims;
 
       // Recurse on left sub-tree:
       System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      intersect(state,
-                2*nodeID,
-                cellMinPacked, splitPackedValue);
+      System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      state.index.pushLeft();
+      intersect(state, cellMinPacked, splitPackedValue);
+      state.index.pop();
+
+      // Restore the split dim value since it may have been overwritten while recursing:
+      System.arraycopy(splitPackedValue, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset, bytesPerDim);
 
       // Recurse on right sub-tree:
       System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      intersect(state,
-                2*nodeID+1,
-                splitPackedValue, cellMaxPacked);
+      System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      state.index.pushRight();
+      intersect(state, splitPackedValue, cellMaxPacked);
+      state.index.pop();
     }
   }
 
-  /** Copies the split value for this node into the provided byte array */
-  public void copySplitValue(int nodeID, byte[] splitPackedValue) {
-    int address = nodeID * bytesPerIndexEntry;
-    int splitDim;
-    if (numDims == 1) {
-      splitDim = 0;
-      if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
-        // skip over wastefully encoded 0 splitDim:
-        assert splitPackedValues[address] == 0;
-        address++;
-      }
-    } else {
-      splitDim = splitPackedValues[address++] & 0xff;
-    }
-    
-    assert splitDim < numDims;
-    System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-  }
-
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.sizeOf(splitPackedValues) +
-        RamUsageEstimator.sizeOf(leafBlockFPs);
+    if (packedIndex != null) {
+      return packedIndex.length;
+    } else {
+      return RamUsageEstimator.sizeOf(splitPackedValues) + RamUsageEstimator.sizeOf(leafBlockFPs);
+    }
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 5526624a961..c82a0c8bf25 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -33,6 +33,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.TrackingDirectoryWrapper;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -83,7 +84,8 @@ public class BKDWriter implements Closeable {
   public static final int VERSION_COMPRESSED_DOC_IDS = 1;
   public static final int VERSION_COMPRESSED_VALUES = 2;
   public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
-  public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
+  public static final int VERSION_PACKED_INDEX = 4;
+  public static final int VERSION_CURRENT = VERSION_PACKED_INDEX;
 
   /** How many bytes each docs takes in the fixed-width offline format */
   private final int bytesPerDoc;
@@ -325,15 +327,10 @@ public class BKDWriter implements Closeable {
                                            bkd.numDims,
                                            bkd.packedBytesLength,
                                            bkd.maxPointsInLeafNode,
+                                           null,
                                            null);
       this.docMap = docMap;
-      long minFP = Long.MAX_VALUE;
-      //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
-      for(long fp : bkd.leafBlockFPs) {
-        minFP = Math.min(minFP, fp);
-        //System.out.println("  leaf fp=" + fp);
-      }
-      state.in.seek(minFP);
+      state.in.seek(bkd.getMinLeafBlockFP());
       this.packedValues = new byte[bkd.maxPointsInLeafNode * bkd.packedBytesLength];
     }
 
@@ -341,7 +338,7 @@ public class BKDWriter implements Closeable {
       //System.out.println("MR.next this=" + this);
       while (true) {
         if (docBlockUpto == docsInBlock) {
-          if (blockID == bkd.leafBlockFPs.length) {
+          if (blockID == bkd.leafNodeOffset) {
             //System.out.println("  done!");
             return false;
           }
@@ -489,7 +486,6 @@ public class BKDWriter implements Closeable {
     return indexFP;
   }
 
-
   /* In the 1D case, we can simply sort points in ascending order and use the
    * same writing logic as we use at merge time. */
   private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
@@ -648,6 +644,7 @@ public class BKDWriter implements Closeable {
     }
 
     private void writeLeafBlock() throws IOException {
+      //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
       assert leafCount != 0;
       if (valueCount == 0) {
         System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
@@ -811,6 +808,24 @@ public class BKDWriter implements Closeable {
     }.sort(0, pointCount);
   }
 
+  // useful for debugging:
+  /*
+  private void printPathSlice(String desc, PathSlice slice, int dim) throws IOException {
+    System.out.println("    " + desc + " dim=" + dim + " count=" + slice.count + ":");    
+    try(PointReader r = slice.writer.getReader(slice.start, slice.count)) {
+      int count = 0;
+      while (r.next()) {
+        byte[] v = r.packedValue();
+        System.out.println("      " + count + ": " + new BytesRef(v, dim*bytesPerDim, bytesPerDim));
+        count++;
+        if (count == slice.count) {
+          break;
+        }
+      }
+    }
+  }
+  */
+
   private PointWriter sort(int dim) throws IOException {
     assert dim >= 0 && dim < numDims;
 
@@ -1019,46 +1034,238 @@ public class BKDWriter implements Closeable {
     return indexFP;
   }
 
-  /** Subclass can change how it writes the index. */
-  protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+  /** Packs the two arrays, representing a balanced binary tree, into a compact byte[] structure. */
+  private byte[] packIndex(long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+
+    int numLeaves = leafBlockFPs.length;
+
+    // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
+    // if it was created by OneDimensionBKDWriter).  In this case the leaf nodes may straddle the two bottom
+    // levels of the binary tree:
+    if (numDims == 1 && numLeaves > 1) {
+      int levelCount = 2;
+      while (true) {
+        if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
+          int lastLevel = 2*(numLeaves - levelCount);
+          assert lastLevel >= 0;
+          if (lastLevel != 0) {
+            // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
+            // at read-time, so that we can still delta code them on disk at write:
+            long[] newLeafBlockFPs = new long[numLeaves];
+            System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
+            System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
+            leafBlockFPs = newLeafBlockFPs;
+          }
+          break;
+        }
+
+        levelCount *= 2;
+      }
+    }
+
+    /** Reused while packing the index */
+    RAMOutputStream writeBuffer = new RAMOutputStream();
+
+    // This is the "file" we append the byte[] to:
+    List<byte[]> blocks = new ArrayList<>();
+    byte[] lastSplitValues = new byte[bytesPerDim * numDims];
+    //System.out.println("\npack index");
+    int totalSize = recursePackIndex(writeBuffer, leafBlockFPs, splitPackedValues, 0l, blocks, 1, lastSplitValues, new boolean[numDims], false);
+
+    // Compact the byte[] blocks into single byte index:
+    byte[] index = new byte[totalSize];
+    int upto = 0;
+    for(byte[] block : blocks) {
+      System.arraycopy(block, 0, index, upto, block.length);
+      upto += block.length;
+    }
+    assert upto == totalSize;
+
+    return index;
+  }
+
+  /** Appends the current contents of writeBuffer as another block on the growing in-memory file */
+  private int appendBlock(RAMOutputStream writeBuffer, List<byte[]> blocks) throws IOException {
+    int pos = Math.toIntExact(writeBuffer.getFilePointer());
+    byte[] bytes = new byte[pos];
+    writeBuffer.writeTo(bytes, 0);
+    writeBuffer.reset();
+    blocks.add(bytes);
+    return pos;
+  }
+
+  /**
+   * lastSplitValues is per-dimension split value previously seen; we use this to prefix-code the split byte[] on each inner node
+   */
+  private int recursePackIndex(RAMOutputStream writeBuffer, long[] leafBlockFPs, byte[] splitPackedValues, long minBlockFP, List<byte[]> blocks,
+                               int nodeID, byte[] lastSplitValues, boolean[] negativeDeltas, boolean isLeft) throws IOException {
+    if (nodeID >= leafBlockFPs.length) {
+      int leafID = nodeID - leafBlockFPs.length;
+      //System.out.println("recursePack leaf nodeID=" + nodeID);
+
+      // In the unbalanced case it's possible the left most node only has one child:
+      if (leafID < leafBlockFPs.length) {
+        long delta = leafBlockFPs[leafID] - minBlockFP;
+        if (isLeft) {
+          assert delta == 0;
+          return 0;
+        } else {
+          assert nodeID == 1 || delta > 0: "nodeID=" + nodeID;
+          writeBuffer.writeVLong(delta);
+          return appendBlock(writeBuffer, blocks);
+        }
+      } else {
+        return 0;
+      }
+    } else {
+      long leftBlockFP;
+      if (isLeft == false) {
+        leftBlockFP = getLeftMostLeafBlockFP(leafBlockFPs, nodeID);
+        long delta = leftBlockFP - minBlockFP;
+        assert nodeID == 1 || delta > 0;
+        writeBuffer.writeVLong(delta);
+      } else {
+        // The left tree's left most leaf block FP is always the minimal FP:
+        leftBlockFP = minBlockFP;
+      }
+
+      int address = nodeID * (1+bytesPerDim);
+      int splitDim = splitPackedValues[address++] & 0xff;
+
+      //System.out.println("recursePack inner nodeID=" + nodeID + " splitDim=" + splitDim + " splitValue=" + new BytesRef(splitPackedValues, address, bytesPerDim));
+
+      // find common prefix with last split value in this dim:
+      int prefix = 0;
+      for(;prefix<bytesPerDim;prefix++) {
+        if (splitPackedValues[address+prefix] != lastSplitValues[splitDim * bytesPerDim + prefix]) {
+          break;
+        }
+      }
+
+      //System.out.println("writeNodeData nodeID=" + nodeID + " splitDim=" + splitDim + " numDims=" + numDims + " bytesPerDim=" + bytesPerDim + " prefix=" + prefix);
+
+      int firstDiffByteDelta;
+      if (prefix < bytesPerDim) {
+        //System.out.println("  delta byte cur=" + Integer.toHexString(splitPackedValues[address+prefix]&0xFF) + " prev=" + Integer.toHexString(lastSplitValues[splitDim * bytesPerDim + prefix]&0xFF) + " negated?=" + negativeDeltas[splitDim]);
+        firstDiffByteDelta = (splitPackedValues[address+prefix]&0xFF) - (lastSplitValues[splitDim * bytesPerDim + prefix]&0xFF);
+        if (negativeDeltas[splitDim]) {
+          firstDiffByteDelta = -firstDiffByteDelta;
+        }
+        //System.out.println("  delta=" + firstDiffByteDelta);
+        assert firstDiffByteDelta > 0;
+      } else {
+        firstDiffByteDelta = 0;
+      }
+
+      // pack the prefix, splitDim and delta first diff byte into a single vInt:
+      int code = (firstDiffByteDelta * (1+bytesPerDim) + prefix) * numDims + splitDim;
+
+      //System.out.println("  code=" + code);
+      //System.out.println("  splitValue=" + new BytesRef(splitPackedValues, address, bytesPerDim));
+
+      writeBuffer.writeVInt(code);
+
+      // write the split value, prefix coded vs. our parent's split value:
+      int suffix = bytesPerDim - prefix;
+      byte[] savSplitValue = new byte[suffix];
+      if (suffix > 1) {
+        writeBuffer.writeBytes(splitPackedValues, address+prefix+1, suffix-1);
+      }
+
+      byte[] cmp = lastSplitValues.clone();
+
+      System.arraycopy(lastSplitValues, splitDim * bytesPerDim + prefix, savSplitValue, 0, suffix);
+
+      // copy our split value into lastSplitValues for our children to prefix-code against
+      System.arraycopy(splitPackedValues, address+prefix, lastSplitValues, splitDim * bytesPerDim + prefix, suffix);
+
+      int numBytes = appendBlock(writeBuffer, blocks);
+
+      // placeholder for left-tree numBytes; we need this so that at search time if we only need to recurse into the right sub-tree we can
+      // quickly seek to its starting point
+      int idxSav = blocks.size();
+      blocks.add(null);
+
+      boolean savNegativeDelta = negativeDeltas[splitDim];
+      negativeDeltas[splitDim] = true;
+
+      int leftNumBytes = recursePackIndex(writeBuffer, leafBlockFPs, splitPackedValues, leftBlockFP, blocks, 2*nodeID, lastSplitValues, negativeDeltas, true);
+
+      if (nodeID * 2 < leafBlockFPs.length) {
+        writeBuffer.writeVInt(leftNumBytes);
+      } else {
+        assert leftNumBytes == 0: "leftNumBytes=" + leftNumBytes;
+      }
+      int numBytes2 = Math.toIntExact(writeBuffer.getFilePointer());
+      byte[] bytes2 = new byte[numBytes2];
+      writeBuffer.writeTo(bytes2, 0);
+      writeBuffer.reset();
+      // replace our placeholder:
+      blocks.set(idxSav, bytes2);
+
+      negativeDeltas[splitDim] = false;
+      int rightNumBytes = recursePackIndex(writeBuffer, leafBlockFPs, splitPackedValues, leftBlockFP, blocks, 2*nodeID+1, lastSplitValues, negativeDeltas, false);
+
+      negativeDeltas[splitDim] = savNegativeDelta;
+
+      // restore lastSplitValues to what caller originally passed us:
+      System.arraycopy(savSplitValue, 0, lastSplitValues, splitDim * bytesPerDim + prefix, suffix);
+
+      assert Arrays.equals(lastSplitValues, cmp);
+      
+      return numBytes + numBytes2 + leftNumBytes + rightNumBytes;
+    }
+  }
+
+  private long getLeftMostLeafBlockFP(long[] leafBlockFPs, int nodeID) {
+    int nodeIDIn = nodeID;
+    // TODO: can we do this cheaper, e.g. a closed form solution instead of while loop?  Or
+    // change the recursion while packing the index to return this left-most leaf block FP
+    // from each recursion instead?
+    //
+    // Still, the overall cost here is minor: this method's cost is O(log(N)), and while writing
+    // we call it O(N) times (N = number of leaf blocks)
+    while (nodeID < leafBlockFPs.length) {
+      nodeID *= 2;
+    }
+    int leafID = nodeID - leafBlockFPs.length;
+    long result = leafBlockFPs[leafID];
+    if (result < 0) {
+      throw new AssertionError(result + " for leaf " + leafID);
+    }
+    return result;
+  }
+
+  private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+    byte[] packedIndex = packIndex(leafBlockFPs, splitPackedValues);
+    writeIndex(out, leafBlockFPs.length, packedIndex);
+  }
+  
+  private void writeIndex(IndexOutput out, int numLeaves, byte[] packedIndex) throws IOException {
+    
     CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
     out.writeVInt(numDims);
     out.writeVInt(maxPointsInLeafNode);
     out.writeVInt(bytesPerDim);
 
-    assert leafBlockFPs.length > 0;
-    out.writeVInt(leafBlockFPs.length);
+    assert numLeaves > 0;
+    out.writeVInt(numLeaves);
     out.writeBytes(minPackedValue, 0, packedBytesLength);
     out.writeBytes(maxPackedValue, 0, packedBytesLength);
 
     out.writeVLong(pointCount);
     out.writeVInt(docsSeen.cardinality());
-
-    // NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
-    if (numDims == 1) {
-      // write the index, skipping the byte used to store the split dim since it is always 0
-      for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) {
-        out.writeBytes(splitPackedValues, i, bytesPerDim);
-      }
-    } else {
-      out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
-    }
-
-    long lastFP = 0;
-    for (int i=0;i<leafBlockFPs.length;i++) {
-      long delta = leafBlockFPs[i]-lastFP;
-      out.writeVLong(delta);
-      lastFP = leafBlockFPs[i];
-    }
+    out.writeVInt(packedIndex.length);
+    out.writeBytes(packedIndex, 0, packedIndex.length);
   }
 
-  protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
+  private void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
     assert count > 0: "maxPointsInLeafNode=" + maxPointsInLeafNode;
     out.writeVInt(count);
     DocIdsWriter.writeDocIds(docIDs, start, count, out);
   }
 
-  protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+  private void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
     int prefixLenSum = Arrays.stream(commonPrefixLengths).sum();
     if (prefixLenSum == packedBytesLength) {
       // all values in this block are equal
@@ -1109,7 +1316,7 @@ public class BKDWriter implements Closeable {
     return end - start;
   }
 
-  protected void writeCommonPrefixes(IndexOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
+  private void writeCommonPrefixes(IndexOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
     for(int dim=0;dim<numDims;dim++) {
       out.writeVInt(commonPrefixes[dim]);
       //System.out.println(commonPrefixes[dim] + " of " + bytesPerDim);
@@ -1177,7 +1384,7 @@ public class BKDWriter implements Closeable {
     // TODO: find a way to also checksum this reader?  If we changed to markLeftTree, and scanned the final chunk, it could work?
     try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) {
       boolean result = reader.next();
-      assert result;
+      assert result: "rightCount=" + rightCount + " source.count=" + source.count + " source.writer=" + source.writer;
       System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
       if (numDims > 1) {
         assert ordBitSet.get(reader.ord()) == false;
@@ -1244,12 +1451,12 @@ public class BKDWriter implements Closeable {
 
   /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
   private void build(int nodeID, int leafNodeOffset,
-      MutablePointValues reader, int from, int to,
-      IndexOutput out,
-      byte[] minPackedValue, byte[] maxPackedValue,
-      byte[] splitPackedValues,
-      long[] leafBlockFPs,
-      int[] spareDocIds) throws IOException {
+                     MutablePointValues reader, int from, int to,
+                     IndexOutput out,
+                     byte[] minPackedValue, byte[] maxPackedValue,
+                     byte[] splitPackedValues,
+                     long[] leafBlockFPs,
+                     int[] spareDocIds) throws IOException {
 
     if (nodeID >= leafNodeOffset) {
       // leaf node
@@ -1311,6 +1518,7 @@ public class BKDWriter implements Closeable {
       for (int i = from; i < to; ++i) {
         docIDs[i - from] = reader.getDocID(i);
       }
+      //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
       writeLeafBlockDocs(out, docIDs, 0, count);
 
       // Write the common prefixes:
@@ -1344,6 +1552,7 @@ public class BKDWriter implements Closeable {
           break;
         }
       }
+
       MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
           reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
 
@@ -1381,7 +1590,7 @@ public class BKDWriter implements Closeable {
     for(PathSlice slice : slices) {
       assert slice.count == slices[0].count;
     }
-
+    
     if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
       // Special case for 1D, to cutover to heap once we recurse deeply enough:
       slices[0] = switchToHeap(slices[0], toCloseHeroically);
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 0cd4bd2a8a0..99182cb7f91 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -18,7 +18,10 @@ package org.apache.lucene.util.bkd;
 
 import java.util.List;
 
-final class HeapPointReader extends PointReader {
+/** Utility class to read buffered points from in-heap arrays.
+ *
+ * @lucene.internal */
+public final class HeapPointReader extends PointReader {
   private int curRead;
   final List<byte[]> blocks;
   final int valuesPerBlock;
@@ -30,7 +33,7 @@ final class HeapPointReader extends PointReader {
   final byte[] scratch;
   final boolean singleValuePerDoc;
 
-  HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
+  public HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
     this.blocks = blocks;
     this.valuesPerBlock = valuesPerBlock;
     this.singleValuePerDoc = singleValuePerDoc;
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index 24d248b930d..e102651ceb2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -24,18 +24,21 @@ import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 
-final class HeapPointWriter implements PointWriter {
-  int[] docIDs;
-  long[] ordsLong;
-  int[] ords;
+/** Utility class to write new points into in-heap arrays.
+ *
+ *  @lucene.internal */
+public final class HeapPointWriter implements PointWriter {
+  public int[] docIDs;
+  public long[] ordsLong;
+  public int[] ords;
   private int nextWrite;
   private boolean closed;
   final int maxSize;
-  final int valuesPerBlock;
+  public final int valuesPerBlock;
   final int packedBytesLength;
   final boolean singleValuePerDoc;
   // NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
-  final List<byte[]> blocks = new ArrayList<>();
+  public final List<byte[]> blocks = new ArrayList<>();
 
   public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds, boolean singleValuePerDoc) {
     docIDs = new int[initSize];
@@ -77,7 +80,7 @@ final class HeapPointWriter implements PointWriter {
     nextWrite = other.nextWrite;
   }
 
-  void readPackedValue(int index, byte[] bytes) {
+  public void readPackedValue(int index, byte[] bytes) {
     assert bytes.length == packedBytesLength;
     int block = index / valuesPerBlock;
     int blockIndex = index % valuesPerBlock;
@@ -85,7 +88,7 @@ final class HeapPointWriter implements PointWriter {
   }
 
   /** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
-  void getPackedValueSlice(int index, BytesRef result) {
+  public void getPackedValueSlice(int index, BytesRef result) {
     int block = index / valuesPerBlock;
     int blockIndex = index % valuesPerBlock;
     result.bytes = blocks.get(block);
@@ -138,7 +141,8 @@ final class HeapPointWriter implements PointWriter {
   @Override
   public PointReader getReader(long start, long length) {
     assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
-    return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite, singleValuePerDoc);
+    assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
+    return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, Math.toIntExact(start+length), singleValuePerDoc);
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
index 132ad3cefa3..1c68478db66 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@@ -26,13 +26,16 @@ import org.apache.lucene.util.Selector;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.packed.PackedInts;
 
-final class MutablePointsReaderUtils {
+/** Utility APIs for sorting and partitioning buffered points.
+ *
+ * @lucene.internal */
+public final class MutablePointsReaderUtils {
 
   MutablePointsReaderUtils() {}
 
   /** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
-  static void sort(int maxDoc, int packedBytesLength,
-      MutablePointValues reader, int from, int to) {
+  public static void sort(int maxDoc, int packedBytesLength,
+                          MutablePointValues reader, int from, int to) {
     final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
     new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
 
@@ -88,9 +91,9 @@ final class MutablePointsReaderUtils {
   }
 
   /** Sort points on the given dimension. */
-  static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
-      MutablePointValues reader, int from, int to,
-      BytesRef scratch1, BytesRef scratch2) {
+  public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
+                               MutablePointValues reader, int from, int to,
+                               BytesRef scratch1, BytesRef scratch2) {
 
     // No need for a fancy radix sort here, this is called on the leaves only so
     // there are not many values to sort
@@ -127,9 +130,9 @@ final class MutablePointsReaderUtils {
   /** Partition points around {@code mid}. All values on the left must be less
    *  than or equal to it and all values on the right must be greater than or
    *  equal to it. */
-  static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
-      MutablePointValues reader, int from, int to, int mid,
-      BytesRef scratch1, BytesRef scratch2) {
+  public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
+                               MutablePointValues reader, int from, int to, int mid,
+                               BytesRef scratch1, BytesRef scratch2) {
     final int offset = splitDim * bytesPerDim + commonPrefixLen;
     final int cmpBytes = bytesPerDim - commonPrefixLen;
     final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 17758c02c93..2861d593deb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -27,8 +27,10 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.LongBitSet;
 
-/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}. */
-final class OfflinePointReader extends PointReader {
+/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}.
+ *
+ * @lucene.internal */
+public final class OfflinePointReader extends PointReader {
   long countLeft;
   final IndexInput in;
   private final byte[] packedValue;
@@ -43,7 +45,7 @@ final class OfflinePointReader extends PointReader {
   // File name we are reading
   final String name;
 
-  OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
+  public OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
                      boolean longOrds, boolean singleValuePerDoc) throws IOException {
     this.singleValuePerDoc = singleValuePerDoc;
     int bytesPerDoc = packedBytesLength + Integer.BYTES;
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 87637ae27ba..7e615a657ec 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -26,12 +26,14 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 
-/** Writes points to disk in a fixed-with format. */
-final class OfflinePointWriter implements PointWriter {
+/** Writes points to disk in a fixed-with format.
+ *
+ * @lucene.internal */
+public final class OfflinePointWriter implements PointWriter {
 
   final Directory tempDir;
-  final IndexOutput out;
-  final String name;
+  public final IndexOutput out;
+  public final String name;
   final int packedBytesLength;
   final boolean singleValuePerDoc;
   long count;
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
index 90de0d182e1..0c31275d02b 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
@@ -24,20 +24,22 @@ import org.apache.lucene.util.LongBitSet;
 
 /** One pass iterator through all points previously written with a
  *  {@link PointWriter}, abstracting away whether points a read
- *  from (offline) disk or simple arrays in heap. */
-abstract class PointReader implements Closeable {
+ *  from (offline) disk or simple arrays in heap.
+ *
+ * @lucene.internal */
+public abstract class PointReader implements Closeable {
 
   /** Returns false once iteration is done, else true. */
-  abstract boolean next() throws IOException;
+  public abstract boolean next() throws IOException;
 
   /** Returns the packed byte[] value */
-  abstract byte[] packedValue();
+  public abstract byte[] packedValue();
 
   /** Point ordinal */
-  abstract long ord();
+  public abstract long ord();
 
   /** DocID for this point */
-  abstract int docID();
+  public abstract int docID();
 
   /** Iterates through the next {@code count} ords, marking them in the provided {@code ordBitSet}. */
   public void markOrds(long count, LongBitSet ordBitSet) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
index d19f6e547b2..0222d0ef5bb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
@@ -23,8 +23,10 @@ import java.util.List;
 
 /** Appends many points, and then at the end provides a {@link PointReader} to iterate
  *  those points.  This abstracts away whether we write to disk, or use simple arrays
- *  in heap. */
-interface PointWriter extends Closeable {
+ *  in heap.
+ *
+ *  @lucene.internal */
+public interface PointWriter extends Closeable {
   /** Add a new point */
   void append(byte[] packedValue, long ord, int docID) throws IOException;
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index 5ad71bfca17..73b28139f9e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -621,6 +621,9 @@ public class TestPointQueries extends LuceneTestCase {
     int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
 
     int sameValuePct = random().nextInt(100);
+    if (VERBOSE) {
+      System.out.println("TEST: sameValuePct=" + sameValuePct);
+    }
 
     byte[][][] docValues = new byte[numValues][][];
 
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
index af2e463102c..e30168c17e8 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.util.bkd;
 
+import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.IOContext;
@@ -64,7 +65,10 @@ public class Test2BBKDPoints extends LuceneTestCase {
     IndexInput in = dir.openInput("1d.bkd", IOContext.DEFAULT);
     in.seek(indexFP);
     BKDReader r = new BKDReader(in);
-    r.verify(numDocs);
+    CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("1d", numDocs, r);
+    r.intersect(visitor);
+    assertEquals(r.size(), visitor.getPointCountSeen());
+    assertEquals(r.getDocCount(), visitor.getDocCountSeen());
     in.close();
     dir.close();
   }
@@ -101,7 +105,10 @@ public class Test2BBKDPoints extends LuceneTestCase {
     IndexInput in = dir.openInput("2d.bkd", IOContext.DEFAULT);
     in.seek(indexFP);
     BKDReader r = new BKDReader(in);
-    r.verify(numDocs);
+    CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("2d", numDocs, r);
+    r.intersect(visitor);
+    assertEquals(r.size(), visitor.getPointCountSeen());
+    assertEquals(r.getDocCount(), visitor.getDocCountSeen());
     in.close();
     dir.close();
   }
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 9eb1fd3ff09..8b9b7a5dd4f 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.store.CorruptingIndexOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FilterDirectory;
@@ -1010,4 +1011,57 @@ public class TestBKD extends LuceneTestCase {
     }
   }
 
+  // Claims 16 bytes per dim, but only use the bottom N 1-3 bytes; this would happen e.g. if a user indexes what are actually just short
+  // values as a LongPoint:
+  public void testWastedLeadingBytes() throws Exception {
+    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
+    int bytesPerDim = PointValues.MAX_NUM_BYTES;
+    int bytesUsed = TestUtil.nextInt(random(), 1, 3);
+
+    Directory dir = newFSDirectory(createTempDir());
+    int numDocs = 100000;
+    BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", numDims, bytesPerDim, 32, 1f, numDocs, true);
+    byte[] tmp = new byte[bytesUsed];
+    byte[] buffer = new byte[numDims * bytesPerDim];
+    for(int i=0;i<numDocs;i++) {
+      for(int dim=0;dim<numDims;dim++) {
+        random().nextBytes(tmp);
+        System.arraycopy(tmp, 0, buffer, dim*bytesPerDim+(bytesPerDim-bytesUsed), tmp.length);
+      }
+      w.add(buffer, i);
+    }
+    
+    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+    long fp = w.finish(out);
+    out.close();
+
+    IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+    in.seek(fp);
+    BKDReader r = new BKDReader(in);
+    int[] count = new int[1];
+    r.intersect(new IntersectVisitor() {
+
+        @Override
+        public void visit(int docID) {
+          count[0]++;
+        }
+
+        @Override
+        public void visit(int docID, byte[] packedValue) {
+          visit(docID);
+        }
+
+        @Override
+        public Relation compare(byte[] minPacked, byte[] maxPacked) {
+          if (random().nextInt(7) == 1) {
+            return Relation.CELL_CROSSES_QUERY;
+          } else {
+            return Relation.CELL_INSIDE_QUERY;
+          }
+        }
+      });
+    assertEquals(numDocs, count[0]);
+    in.close();
+    dir.close();
+  }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
index 6b218cf3b9d..dcce285383b 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@@ -228,7 +228,7 @@ public class TestFSTs extends LuceneTestCase {
         final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
         lastOutput = value;
         pairs.add(new FSTTester.InputOutput<>(terms[idx],
-                                                                         outputs.newPair((long) idx, value)));
+                                              outputs.newPair((long) idx, value)));
       }
       new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java b/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
index 3b9f302f5eb..587c63fb7a3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
@@ -26,7 +26,10 @@ import org.apache.lucene.geo.Rectangle;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SloppyMath;
+import org.apache.lucene.util.bkd.BKDReader.IndexTree;
+import org.apache.lucene.util.bkd.BKDReader.IntersectState;
 import org.apache.lucene.util.bkd.BKDReader;
 
 import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
@@ -41,16 +44,16 @@ class NearestNeighbor {
 
   static class Cell implements Comparable<Cell> {
     final int readerIndex;
-    final int nodeID;
     final byte[] minPacked;
     final byte[] maxPacked;
+    final IndexTree index;
 
     /** The closest possible distance of all points in this cell */
     final double distanceMeters;
 
-    public Cell(int readerIndex, int nodeID, byte[] minPacked, byte[] maxPacked, double distanceMeters) {
+    public Cell(IndexTree index, int readerIndex, byte[] minPacked, byte[] maxPacked, double distanceMeters) {
+      this.index = index;
       this.readerIndex = readerIndex;
-      this.nodeID = nodeID;
       this.minPacked = minPacked.clone();
       this.maxPacked = maxPacked.clone();
       this.distanceMeters = distanceMeters;
@@ -66,7 +69,7 @@ class NearestNeighbor {
       double minLon = decodeLongitude(minPacked, Integer.BYTES);
       double maxLat = decodeLatitude(maxPacked, 0);
       double maxLon = decodeLongitude(maxPacked, Integer.BYTES);
-      return "Cell(readerIndex=" + readerIndex + " lat=" + minLat + " TO " + maxLat + ", lon=" + minLon + " TO " + maxLon + "; distanceMeters=" + distanceMeters + ")";
+      return "Cell(readerIndex=" + readerIndex + " nodeID=" + index.getNodeID() + " isLeaf=" + index.isLeafNode() + " lat=" + minLat + " TO " + maxLat + ", lon=" + minLon + " TO " + maxLon + "; distanceMeters=" + distanceMeters + ")";
     }
   }
 
@@ -219,13 +222,21 @@ class NearestNeighbor {
     List<BKDReader.IntersectState> states = new ArrayList<>();
 
     // Add root cell for each reader into the queue:
+    int bytesPerDim = -1;
+    
     for(int i=0;i<readers.size();i++) {
       BKDReader reader = readers.get(i);
+      if (bytesPerDim == -1) {
+        bytesPerDim = reader.getBytesPerDimension();
+      } else if (bytesPerDim != reader.getBytesPerDimension()) {
+        throw new IllegalStateException("bytesPerDim changed from " + bytesPerDim + " to " + reader.getBytesPerDimension() + " across readers");
+      }
       byte[] minPackedValue = reader.getMinPackedValue();
       byte[] maxPackedValue = reader.getMaxPackedValue();
-      states.add(reader.getIntersectState(visitor));
+      IntersectState state = reader.getIntersectState(visitor);
+      states.add(state);
 
-      cellQueue.offer(new Cell(i, 1, reader.getMinPackedValue(), reader.getMaxPackedValue(),
+      cellQueue.offer(new Cell(state.index, i, reader.getMinPackedValue(), reader.getMaxPackedValue(),
                                approxBestDistance(minPackedValue, maxPackedValue, pointLat, pointLon)));
     }
 
@@ -236,12 +247,12 @@ class NearestNeighbor {
       // TODO: if we replace approxBestDistance with actualBestDistance, we can put an opto here to break once this "best" cell is fully outside of the hitQueue bottom's radius:
       BKDReader reader = readers.get(cell.readerIndex);
 
-      if (reader.isLeafNode(cell.nodeID)) {
+      if (cell.index.isLeafNode()) {
         //System.out.println("    leaf");
         // Leaf block: visit all points and possibly collect them:
         visitor.curDocBase = docBases.get(cell.readerIndex);
         visitor.curLiveDocs = liveDocs.get(cell.readerIndex);
-        reader.visitLeafBlockValues(cell.nodeID, states.get(cell.readerIndex));
+        reader.visitLeafBlockValues(cell.index, states.get(cell.readerIndex));
         //System.out.println("    now " + hitQueue.size() + " hits");
       } else {
         //System.out.println("    non-leaf");
@@ -257,14 +268,23 @@ class NearestNeighbor {
           continue;
         }
         
+        BytesRef splitValue = BytesRef.deepCopyOf(cell.index.getSplitDimValue());
+        int splitDim = cell.index.getSplitDim();
+        
+        // we must clone the index so that we we can recurse left and right "concurrently":
+        IndexTree newIndex = cell.index.clone();
         byte[] splitPackedValue = cell.maxPacked.clone();
-        reader.copySplitValue(cell.nodeID, splitPackedValue);
-        cellQueue.offer(new Cell(cell.readerIndex, 2*cell.nodeID, cell.minPacked, splitPackedValue,
+        System.arraycopy(splitValue.bytes, splitValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+
+        cell.index.pushLeft();
+        cellQueue.offer(new Cell(cell.index, cell.readerIndex, cell.minPacked, splitPackedValue,
                                  approxBestDistance(cell.minPacked, splitPackedValue, pointLat, pointLon)));
 
         splitPackedValue = cell.minPacked.clone();
-        reader.copySplitValue(cell.nodeID, splitPackedValue);
-        cellQueue.offer(new Cell(cell.readerIndex, 2*cell.nodeID+1, splitPackedValue, cell.maxPacked,
+        System.arraycopy(splitValue.bytes, splitValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+
+        newIndex.pushRight();
+        cellQueue.offer(new Cell(newIndex, cell.readerIndex, splitPackedValue, cell.maxPacked,
                                  approxBestDistance(splitPackedValue, cell.maxPacked, pointLat, pointLon)));
       }
     }

From bd8b191505d92c89a483a6189497374238476a00 Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Mon, 5 Dec 2016 06:45:16 -0500
Subject: [PATCH 15/53] LUCENE-7563: remove redundant array copy in
 PackedIndexTree.clone

---
 lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index 6cccf4cf1d1..44744c181a3 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -347,7 +347,6 @@ public final class BKDReader extends PointValues implements Accountable {
       index.nodeID = nodeID;
       index.level = level;
       index.splitDim = splitDim;
-      System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
       index.leafBlockFPStack[level] = leafBlockFPStack[level];
       index.leftNodePositions[level] = leftNodePositions[level];
       index.rightNodePositions[level] = rightNodePositions[level];

From 2e948fea300f883b7dfb586e303d5720d09b3210 Mon Sep 17 00:00:00 2001
From: David Smiley <dsmiley@apache.org>
Date: Mon, 5 Dec 2016 16:11:57 -0500
Subject: [PATCH 16/53] LUCENE-7575: Add UnifiedHighlighter field matcher
 predicate (AKA requireFieldMatch=false)

---
 lucene/CHANGES.txt                            |   4 +
 .../uhighlight/MemoryIndexOffsetStrategy.java |  10 +-
 .../uhighlight/MultiTermHighlighting.java     |  37 +--
 .../search/uhighlight/PhraseHelper.java       | 158 +++++++---
 .../search/uhighlight/UnifiedHighlighter.java |  64 ++--
 .../uhighlight/TestUnifiedHighlighter.java    | 275 ++++++++++++++++++
 .../TestUnifiedHighlighterExtensibility.java  |   3 +-
 7 files changed, 467 insertions(+), 84 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 79e44e112c8..c6c39ac45fb 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -121,6 +121,10 @@ Improvements
   control how text is analyzed and converted into a query (Matt Weber
   via Mike McCandless)
 
+* LUCENE-7575: UnifiedHighlighter can now highlight fields with queries that don't
+  necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
+  See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
+
 Optimizations
 
 * LUCENE-7568: Optimize merging when index sorting is used but the
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index 4028912fcf0..0001a801f8c 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.function.Function;
+import java.util.function.Predicate;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.FilteringTokenFilter;
@@ -49,7 +50,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
   private final LeafReader leafReader;
   private final CharacterRunAutomaton preMemIndexFilterAutomaton;
 
-  public MemoryIndexOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
+  public MemoryIndexOffsetStrategy(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
                                    CharacterRunAutomaton[] automata, Analyzer analyzer,
                                    Function<Query, Collection<Query>> multiTermQueryRewrite) {
     super(field, extractedTerms, phraseHelper, automata, analyzer);
@@ -57,13 +58,14 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
     memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
     leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
     // preFilter for MemoryIndex
-    preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, phraseHelper, multiTermQueryRewrite);
+    preMemIndexFilterAutomaton = buildCombinedAutomaton(fieldMatcher, terms, this.automata, phraseHelper, multiTermQueryRewrite);
   }
 
   /**
    * Build one {@link CharacterRunAutomaton} matching any term the query might match.
    */
-  private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
+  private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher,
+                                                              BytesRef[] terms,
                                                               CharacterRunAutomaton[] automata,
                                                               PhraseHelper strictPhrases,
                                                               Function<Query, Collection<Query>> multiTermQueryRewrite) {
@@ -74,7 +76,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
     Collections.addAll(allAutomata, automata);
     for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
       Collections.addAll(allAutomata,
-          MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
+          MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));//true==lookInSpan
     }
 
     if (allAutomata.size() == 1) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index fd6a26a778f..267d6039d83 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -22,6 +22,7 @@ import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
 import java.util.function.Function;
+import java.util.function.Predicate;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.AutomatonQuery;
@@ -56,50 +57,52 @@ class MultiTermHighlighting {
   }
 
   /**
-   * Extracts all MultiTermQueries for {@code field}, and returns equivalent
-   * automata that will match terms.
+   * Extracts MultiTermQueries that match the provided field predicate.
+   * Returns equivalent automata that will match terms.
    */
-  public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
+  public static CharacterRunAutomaton[] extractAutomata(Query query,
+                                                        Predicate<String> fieldMatcher,
+                                                        boolean lookInSpan,
                                                         Function<Query, Collection<Query>> preRewriteFunc) {
     List<CharacterRunAutomaton> list = new ArrayList<>();
     Collection<Query> customSubQueries = preRewriteFunc.apply(query);
     if (customSubQueries != null) {
       for (Query sub : customSubQueries) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (query instanceof BooleanQuery) {
       for (BooleanClause clause : (BooleanQuery) query) {
         if (!clause.isProhibited()) {
-          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
+          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
         }
       }
     } else if (query instanceof ConstantScoreQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
+      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
           preRewriteFunc)));
     } else if (query instanceof DisjunctionMaxQuery) {
       for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (lookInSpan && query instanceof SpanOrQuery) {
       for (Query sub : ((SpanOrQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (lookInSpan && query instanceof SpanNearQuery) {
       for (Query sub : ((SpanNearQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (lookInSpan && query instanceof SpanNotQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
+      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan,
           preRewriteFunc)));
     } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
+      list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan,
           preRewriteFunc)));
     } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
-          lookInSpan, preRewriteFunc)));
+      list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
+          fieldMatcher, lookInSpan, preRewriteFunc)));
     } else if (query instanceof AutomatonQuery) {
       final AutomatonQuery aq = (AutomatonQuery) query;
-      if (aq.getField().equals(field)) {
+      if (fieldMatcher.test(aq.getField())) {
         list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
           @Override
           public String toString() {
@@ -110,7 +113,7 @@ class MultiTermHighlighting {
     } else if (query instanceof PrefixQuery) {
       final PrefixQuery pq = (PrefixQuery) query;
       Term prefix = pq.getPrefix();
-      if (prefix.field().equals(field)) {
+      if (fieldMatcher.test(prefix.field())) {
         list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
             Automata.makeAnyString())) {
           @Override
@@ -121,7 +124,7 @@ class MultiTermHighlighting {
       }
     } else if (query instanceof FuzzyQuery) {
       final FuzzyQuery fq = (FuzzyQuery) query;
-      if (fq.getField().equals(field)) {
+      if (fieldMatcher.test(fq.getField())) {
         String utf16 = fq.getTerm().text();
         int termText[] = new int[utf16.codePointCount(0, utf16.length())];
         for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
@@ -142,7 +145,7 @@ class MultiTermHighlighting {
       }
     } else if (query instanceof TermRangeQuery) {
       final TermRangeQuery tq = (TermRangeQuery) query;
-      if (tq.getField().equals(field)) {
+      if (fieldMatcher.test(tq.getField())) {
         final CharsRef lowerBound;
         if (tq.getLowerTerm() == null) {
           lowerBound = null;
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
index cde17baf87a..d7e8671c4c2 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
@@ -16,17 +16,50 @@
  */
 package org.apache.lucene.search.uhighlight;
 
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.*;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.function.Function;
+import java.util.function.Predicate;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.highlight.WeightedSpanTerm;
 import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
-import org.apache.lucene.search.spans.*;
+import org.apache.lucene.search.spans.SpanCollector;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanWeight;
+import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.BytesRef;
 
-import java.io.IOException;
-import java.util.*;
-import java.util.function.Function;
-
 /**
  * Helps the {@link FieldOffsetStrategy} with strict position highlighting (e.g. highlight phrases correctly).
  * This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
@@ -40,7 +73,7 @@ import java.util.function.Function;
 public class PhraseHelper {
 
   public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
-      spanQuery -> null, query -> null, true);
+      (s) -> false, spanQuery -> null, query -> null, true);
 
   //TODO it seems this ought to be a general thing on Spans?
   private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@@ -59,10 +92,11 @@ public class PhraseHelper {
     }
   };
 
-  private final String fieldName; // if non-null, only look at queries/terms for this field
+  private final String fieldName;
   private final Set<Term> positionInsensitiveTerms; // (TermQuery terms)
   private final Set<SpanQuery> spanQueries;
   private final boolean willRewrite;
+  private final Predicate<String> fieldMatcher;
 
   /**
    * Constructor.
@@ -73,14 +107,15 @@ public class PhraseHelper {
    * to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
    * {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
    * usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
+   * {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
    */
-  public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
+  public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
                       Function<Query, Collection<Query>> preExtractRewriteFunction,
                       boolean ignoreQueriesNeedingRewrite) {
-    this.fieldName = field; // if null then don't require field match
+    this.fieldName = field;
+    this.fieldMatcher = fieldMatcher;
     // filter terms to those we want
-    positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
-    // requireFieldMatch optional
+    positionInsensitiveTerms = new FieldFilteringTermSet();
     spanQueries = new HashSet<>();
 
     // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
@@ -131,11 +166,11 @@ public class PhraseHelper {
       @Override
       protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
                                               float boost) throws IOException {
-        if (field != null) {
-          // if this span query isn't for this field, skip it.
-          Set<String> fieldNameSet = new HashSet<>();//TODO reuse.  note: almost always size 1
-          collectSpanQueryFields(spanQuery, fieldNameSet);
-          if (!fieldNameSet.contains(field)) {
+        // if this span query isn't for this field, skip it.
+        Set<String> fieldNameSet = new HashSet<>();//TODO reuse.  note: almost always size 1
+        collectSpanQueryFields(spanQuery, fieldNameSet);
+        for (String spanField : fieldNameSet) {
+          if (!fieldMatcher.test(spanField)) {
             return;
           }
         }
@@ -190,10 +225,11 @@ public class PhraseHelper {
     if (spanQueries.isEmpty()) {
       return Collections.emptyMap();
     }
+    final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
     // for each SpanQuery, collect the member spans into a map.
     Map<BytesRef, Spans> result = new HashMap<>();
     for (SpanQuery spanQuery : spanQueries) {
-      getTermToSpans(spanQuery, leafReader.getContext(), doc, result);
+      getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
     }
     return result;
   }
@@ -203,15 +239,14 @@ public class PhraseHelper {
                               int doc, Map<BytesRef, Spans> result)
       throws IOException {
     // note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
-    final IndexSearcher searcher = new IndexSearcher(readerContext);
+    final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
     searcher.setQueryCache(null);
     if (willRewrite) {
       spanQuery = (SpanQuery) searcher.rewrite(spanQuery); // searcher.rewrite loops till done
     }
 
     // Get the underlying query terms
-
-    TreeSet<Term> termSet = new TreeSet<>(); // sorted so we can loop over results in order shortly...
+    TreeSet<Term> termSet = new FieldFilteringTermSet(); // sorted so we can loop over results in order shortly...
     searcher.createWeight(spanQuery, false, 1.0f).extractTerms(termSet);//needsScores==false
 
     // Get Spans by running the query against the reader
@@ -240,9 +275,6 @@ public class PhraseHelper {
     for (final Term queryTerm : termSet) {
       // note: we expect that at least one query term will pass these filters. This is because the collected
       //   spanQuery list were already filtered by these conditions.
-      if (fieldName != null && fieldName.equals(queryTerm.field()) == false) {
-        continue;
-      }
       if (positionInsensitiveTerms.contains(queryTerm)) {
         continue;
       }
@@ -375,19 +407,17 @@ public class PhraseHelper {
   }
 
   /**
-   * Simple HashSet that filters out Terms not matching a desired field on {@code add()}.
+   * Simple TreeSet that filters out Terms not matching the provided predicate on {@code add()}.
    */
-  private static class FieldFilteringTermHashSet extends HashSet<Term> {
-    private final String field;
-
-    FieldFilteringTermHashSet(String field) {
-      this.field = field;
-    }
-
+  private class FieldFilteringTermSet extends TreeSet<Term> {
     @Override
     public boolean add(Term term) {
-      if (term.field().equals(field)) {
-        return super.add(term);
+      if (fieldMatcher.test(term.field())) {
+        if (term.field().equals(fieldName)) {
+          return super.add(term);
+        } else {
+          return super.add(new Term(fieldName, term.bytes()));
+        }
       } else {
         return false;
       }
@@ -499,6 +529,64 @@ public class PhraseHelper {
     }
   }
 
+  /**
+   * This reader will just delegate every call to a single field in the wrapped
+   * LeafReader. This way we ensure that all queries going through this reader target the same field.
+  */
+  static final class SingleFieldFilterLeafReader extends FilterLeafReader {
+    final String fieldName;
+    SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
+      super(in);
+      this.fieldName = fieldName;
+    }
+
+    @Override
+    public FieldInfos getFieldInfos() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Fields fields() throws IOException {
+      return new FilterFields(super.fields()) {
+        @Override
+        public Terms terms(String field) throws IOException {
+          return super.terms(fieldName);
+        }
+
+        @Override
+        public Iterator<String> iterator() {
+          return Collections.singletonList(fieldName).iterator();
+        }
+
+        @Override
+        public int size() {
+          return 1;
+        }
+      };
+    }
+
+    @Override
+    public NumericDocValues getNumericDocValues(String field) throws IOException {
+      return super.getNumericDocValues(fieldName);
+    }
+
+    @Override
+    public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+      return super.getBinaryDocValues(fieldName);
+    }
+
+    @Override
+    public SortedDocValues getSortedDocValues(String field) throws IOException {
+      return super.getSortedDocValues(fieldName);
+    }
+
+    @Override
+    public NumericDocValues getNormValues(String field) throws IOException {
+      return super.getNormValues(fieldName);
+    }
+  }
+
+
   /**
    * A Spans based on a list of cached spans for one doc.  It is pre-positioned to this doc.
    */
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index ac5f0f69999..bbcfd5b0203 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -31,6 +32,7 @@ import java.util.Objects;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
+import java.util.function.Predicate;
 import java.util.function.Supplier;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -58,7 +60,6 @@ import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.InPlaceMergeSorter;
-import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
@@ -119,13 +120,13 @@ public class UnifiedHighlighter {
 
   private boolean defaultPassageRelevancyOverSpeed = true; //For analysis, prefer MemoryIndexOffsetStrategy
 
-  // private boolean defaultRequireFieldMatch = true; TODO
-
   private int maxLength = DEFAULT_MAX_LENGTH;
 
   // BreakIterator is stateful so we use a Supplier factory method
   private Supplier<BreakIterator> defaultBreakIterator = () -> BreakIterator.getSentenceInstance(Locale.ROOT);
 
+  private Predicate<String> defaultFieldMatcher;
+
   private PassageScorer defaultScorer = new PassageScorer();
 
   private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
@@ -140,8 +141,8 @@ public class UnifiedHighlighter {
   /**
    * Calls {@link Weight#extractTerms(Set)} on an empty index for the query.
    */
-  protected static SortedSet<Term> extractTerms(Query query) throws IOException {
-    SortedSet<Term> queryTerms = new TreeSet<>();
+  protected static Set<Term> extractTerms(Query query) throws IOException {
+    Set<Term> queryTerms = new HashSet<>();
     EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
     return queryTerms;
   }
@@ -197,6 +198,10 @@ public class UnifiedHighlighter {
     this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
   }
 
+  public void setFieldMatcher(Predicate<String> predicate) {
+    this.defaultFieldMatcher = predicate;
+  }
+
   /**
    * Returns whether {@link MultiTermQuery} derivatives will be highlighted.  By default it's enabled.  MTQ
    * highlighting can be expensive, particularly when using offsets in postings.
@@ -220,6 +225,18 @@ public class UnifiedHighlighter {
     return defaultPassageRelevancyOverSpeed;
   }
 
+  /**
+   * Returns the predicate to use for extracting the query part that must be highlighted.
+   * By default only queries that target the current field are kept. (AKA requireFieldMatch)
+   */
+  protected Predicate<String> getFieldMatcher(String field) {
+    if (defaultFieldMatcher != null) {
+      return defaultFieldMatcher;
+    } else {
+      // requireFieldMatch = true
+      return (qf) -> field.equals(qf);
+    }
+  }
 
   /**
    * The maximum content size to process.  Content will be truncated to this size before highlighting. Typically
@@ -548,7 +565,7 @@ public class UnifiedHighlighter {
     copyAndSortFieldsWithMaxPassages(fieldsIn, maxPassagesIn, fields, maxPassages); // latter 2 are "out" params
 
     // Init field highlighters (where most of the highlight logic lives, and on a per field basis)
-    SortedSet<Term> queryTerms = extractTerms(query);
+    Set<Term> queryTerms = extractTerms(query);
     FieldHighlighter[] fieldHighlighters = new FieldHighlighter[fields.length];
     int numTermVectors = 0;
     int numPostings = 0;
@@ -718,13 +735,13 @@ public class UnifiedHighlighter {
           getClass().getSimpleName() + " without an IndexSearcher.");
     }
     Objects.requireNonNull(content, "content is required");
-    SortedSet<Term> queryTerms = extractTerms(query);
+    Set<Term> queryTerms = extractTerms(query);
     return getFieldHighlighter(field, query, queryTerms, maxPassages)
         .highlightFieldForDoc(null, -1, content);
   }
 
-  protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
-    BytesRef[] terms = filterExtractedTerms(field, allTerms);
+  protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
+    BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
     Set<HighlightFlag> highlightFlags = getFlags(field);
     PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
     CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
@@ -738,19 +755,15 @@ public class UnifiedHighlighter {
         getFormatter(field));
   }
 
-  protected static BytesRef[] filterExtractedTerms(String field, SortedSet<Term> queryTerms) {
-    // TODO consider requireFieldMatch
-    Term floor = new Term(field, "");
-    Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
-    SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
-
-    // Strip off the redundant field:
-    BytesRef[] terms = new BytesRef[fieldTerms.size()];
-    int termUpto = 0;
-    for (Term term : fieldTerms) {
-      terms[termUpto++] = term.bytes();
+  protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) {
+    // Strip off the redundant field and sort the remaining terms
+    SortedSet<BytesRef> filteredTerms = new TreeSet<>();
+    for (Term term : queryTerms) {
+      if (fieldMatcher.test(term.field())) {
+        filteredTerms.add(term.bytes());
+      }
     }
-    return terms;
+    return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
   }
 
   protected Set<HighlightFlag> getFlags(String field) {
@@ -771,14 +784,13 @@ public class UnifiedHighlighter {
     boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
     boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
     return highlightPhrasesStrictly ?
-        new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
-        PhraseHelper.NONE;
+        new PhraseHelper(query, field, getFieldMatcher(field),
+            this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) : PhraseHelper.NONE;
   }
 
   protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
     return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
-        ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
-          this::preMultiTermQueryRewrite)
+        ? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), !highlightFlags.contains(HighlightFlag.PHRASES), this::preMultiTermQueryRewrite)
         : ZERO_LEN_AUTOMATA_ARRAY;
   }
 
@@ -826,7 +838,7 @@ public class UnifiedHighlighter {
           //skip using a memory index since it's pure term filtering
           return new TokenStreamOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
         } else {
-          return new MemoryIndexOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
+          return new MemoryIndexOffsetStrategy(field, getFieldMatcher(field), terms, phraseHelper, automata, getIndexAnalyzer(),
               this::preMultiTermQueryRewrite);
         }
       case NONE_NEEDED:
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 0fd7d3d0a23..ddf8a926ba0 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.function.Predicate;
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -32,14 +33,17 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
@@ -959,4 +963,275 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
     ir.close();
   }
 
+  private IndexReader indexSomeFields() throws IOException {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+    FieldType ft = new FieldType();
+    ft.setIndexOptions(IndexOptions.NONE);
+    ft.setTokenized(false);
+    ft.setStored(true);
+    ft.freeze();
+
+    Field title = new Field("title", "", fieldType);
+    Field text = new Field("text", "", fieldType);
+    Field category = new Field("category", "", fieldType);
+
+    Document doc = new Document();
+    doc.add(title);
+    doc.add(text);
+    doc.add(category);
+    title.setStringValue("This is the title field.");
+    text.setStringValue("This is the text field. You can put some text if you want.");
+    category.setStringValue("This is the category field.");
+    iw.addDocument(doc);
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+    return ir;
+  }
+
+  public void testFieldMatcherTermQuery() throws Exception {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Predicate<String> getFieldMatcher(String field) {
+        // requireFieldMatch=false
+        return (qf) -> true;
+      }
+    };
+    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
+    BooleanQuery.Builder queryBuilder =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("text", "field")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("text", "this")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("title", "this")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("category", "this")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("category", "some")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("category", "category")), BooleanClause.Occur.SHOULD);
+    Query query = queryBuilder.build();
+
+    // title
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // text
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // category
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
+
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+    ir.close();
+  }
+
+  public void testFieldMatcherMultiTermQuery() throws Exception {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Predicate<String> getFieldMatcher(String field) {
+        // requireFieldMatch=false
+        return (qf) -> true;
+      }
+    };
+    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
+    BooleanQuery.Builder queryBuilder =
+        new BooleanQuery.Builder()
+            .add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD)
+            .add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
+    Query query = queryBuilder.build();
+
+    // title
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // text
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // category
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
+
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+    ir.close();
+  }
+
+  public void testFieldMatcherPhraseQuery() throws Exception {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Predicate<String> getFieldMatcher(String field) {
+        // requireFieldMatch=false
+        return (qf) -> true;
+      }
+    };
+    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
+    BooleanQuery.Builder queryBuilder =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
+    Query query = queryBuilder.build();
+
+    // title
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // text
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // category
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+    ir.close();
+  }
 }
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index d15094000c3..10757a5b1e7 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -23,7 +23,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.SortedSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -144,7 +143,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
       }
 
       @Override
-      protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
+      protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
         return super.getFieldHighlighter(field, query, allTerms, maxPassages);
       }
 

From bf3a3137be8a70ceed884e87c3ada276e82b187b Mon Sep 17 00:00:00 2001
From: Steve Rowe <sarowe@apache.org>
Date: Tue, 6 Dec 2016 13:11:36 -0500
Subject: [PATCH 17/53] SOLR-9832: Schema modifications are not immediately
 visible on the coordinating node

---
 solr/CHANGES.txt                              |  2 +
 .../java/org/apache/solr/core/SolrCore.java   |  8 --
 .../schema/ManagedIndexSchemaFactory.java     | 12 +++
 .../org/apache/solr/schema/SchemaManager.java |  2 +-
 .../ManagedSchemaRoundRobinCloudTest.java     | 98 +++++++++++++++++++
 5 files changed, 113 insertions(+), 9 deletions(-)
 create mode 100644 solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e76616942be..bac24e570d6 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -220,6 +220,8 @@ Bug Fixes
 
 * SOLR-9616: Solr throws exception when expand=true on empty index (Timo Hund via Ishan Chattopadhyaya)
 
+* SOLR-9832: Schema modifications are not immediately visible on the coordinating node. (Steve Rowe)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index e5bc53da9bd..a459bf2e9b1 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -119,7 +119,6 @@ import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.IndexSchemaFactory;
 import org.apache.solr.schema.ManagedIndexSchema;
-import org.apache.solr.schema.SchemaManager;
 import org.apache.solr.schema.SimilarityFactory;
 import org.apache.solr.search.QParserPlugin;
 import org.apache.solr.search.SolrFieldCacheMBean;
@@ -2720,13 +2719,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
       if (checkStale(zkClient, overlayPath, solrConfigversion) ||
           checkStale(zkClient, solrConfigPath, overlayVersion) ||
           checkStale(zkClient, managedSchmaResourcePath, managedSchemaVersion)) {
-
-        try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
-          solrCore.setLatestSchema(SchemaManager.getFreshManagedSchema(solrCore));
-        } catch (Exception e) {
-          log.warn("", SolrZkClient.checkInterrupted(e));
-        }
-
         log.info("core reload {}", coreName);
         try {
           cc.reload(coreName);
diff --git a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
index 66d947ea09b..d4a10bda5f0 100644
--- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
+++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
@@ -377,6 +377,18 @@ public class ManagedIndexSchemaFactory extends IndexSchemaFactory implements Sol
       this.zkIndexSchemaReader = new ZkIndexSchemaReader(this, core);
       ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader)loader;
       zkLoader.setZkIndexSchemaReader(this.zkIndexSchemaReader);
+      try {
+        zkIndexSchemaReader.refreshSchemaFromZk(-1); // update immediately if newer is available
+        core.setLatestSchema(getSchema());
+      } catch (KeeperException e) {
+        String msg = "Error attempting to access " + zkLoader.getConfigSetZkPath() + "/" + managedSchemaResourceName;
+        log.error(msg, e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
+      } catch (InterruptedException e) {
+        // Restore the interrupted status
+        Thread.currentThread().interrupt();
+        log.warn("", e);
+      }
     } else {
       this.zkIndexSchemaReader = null;
     }
diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
index 4b0ea546fe4..33406318158 100644
--- a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
+++ b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
@@ -133,8 +133,8 @@ public class SchemaManager {
         try {
           int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(),
               managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true);
+          req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName());
           waitForOtherReplicasToUpdate(timeOut, latestVersion);
-          core.setLatestSchema(managedIndexSchema);
           return Collections.emptyList();
         } catch (ZkController.ResourceModifiedInZkException e) {
           log.info("Schema was modified by another node. Retrying..");
diff --git a/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java b/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java
new file mode 100644
index 00000000000..883ebfdb5e6
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.schema.SchemaRequest;
+import org.apache.solr.client.solrj.response.schema.SchemaResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.cloud.DocCollection;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ManagedSchemaRoundRobinCloudTest extends SolrCloudTestCase {
+  private static final String COLLECTION = "managed_coll";
+  private static final String CONFIG = "cloud-managed";
+  private static final String FIELD_PREFIX = "NumberedField_";
+  private static final int NUM_SHARDS = 2;
+  private static final int NUM_FIELDS_TO_ADD = 10;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    System.setProperty("managed.schema.mutable", "true");
+    configureCluster(NUM_SHARDS).addConfig(CONFIG, configset(CONFIG)).configure();
+    CollectionAdminRequest.createCollection(COLLECTION, CONFIG, NUM_SHARDS, 1)
+        .setMaxShardsPerNode(1)
+        .process(cluster.getSolrClient());
+    cluster.getSolrClient().waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS,
+        (n, c) -> DocCollection.isFullyActive(n, c, NUM_SHARDS, 1));
+  }
+
+  @AfterClass
+  public static void clearSysProps() throws Exception {
+    System.clearProperty("managed.schema.mutable");
+  }
+
+  @Test
+  public void testAddFieldsRoundRobin() throws Exception {
+    List<HttpSolrClient> clients = new ArrayList<>(NUM_SHARDS);
+    try {
+      for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
+        clients.add(getHttpSolrClient(cluster.getJettySolrRunners().get(shardNum).getBaseUrl().toString()));
+      }
+      int shardNum = 0;
+      for (int fieldNum = 0 ; fieldNum < NUM_FIELDS_TO_ADD ; ++fieldNum) {
+        addField(clients.get(shardNum), keyValueArrayToMap("name", FIELD_PREFIX + fieldNum, "type", "string"));
+        if (++shardNum == NUM_SHARDS) { 
+          shardNum = 0;
+        }
+      }
+    } finally {
+      for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
+        clients.get(shardNum).close();
+      }
+    }
+  }
+
+  private void addField(SolrClient client, Map<String,Object> field) throws Exception {
+    SchemaResponse.UpdateResponse addFieldResponse = new SchemaRequest.AddField(field).process(client, COLLECTION);
+    assertNotNull(addFieldResponse);
+    assertEquals(0, addFieldResponse.getStatus());
+    assertNull(addFieldResponse.getResponse().get("errors"));
+    String fieldName = field.get("name").toString();
+    SchemaResponse.FieldResponse fieldResponse = new SchemaRequest.Field(fieldName).process(client, COLLECTION);
+    assertNotNull(fieldResponse);
+    assertEquals(0, fieldResponse.getStatus());
+  }
+
+  private Map<String,Object> keyValueArrayToMap(String... alternatingKeysAndValues) {
+    Map<String,Object> map = new HashMap<>();
+    for (int i = 0 ; i < alternatingKeysAndValues.length ; i += 2)
+      map.put(alternatingKeysAndValues[i], alternatingKeysAndValues[i + 1]);
+    return map;
+  }
+}

From c164f7e35e45d0bfa844cd450ffb4865c27fc4d5 Mon Sep 17 00:00:00 2001
From: Tomas Fernandez Lobbe <tflobbe@apache.org>
Date: Tue, 6 Dec 2016 10:34:22 -0800
Subject: [PATCH 18/53] SOLR-9827: Make ConcurrentUpdateSolrClient create
 RemoteSolrExceptions in case of remote errors instead of SolrException

---
 solr/CHANGES.txt                                 |  4 ++++
 .../solrj/impl/ConcurrentUpdateSolrClient.java   | 16 +++++++++++++---
 .../solr/client/solrj/SolrExampleTests.java      |  7 ++++++-
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bac24e570d6..4f7377c5f94 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -259,6 +259,10 @@ Other Changes
 
 * SOLR-9819: Upgrade commons-fileupload to 1.3.2, fixing a potential vulnerability CVE-2016-3092 (Anshum Gupta)
 
+* SOLR-9827: ConcurrentUpdateSolrClient creates a RemoteSolrException if the remote host responded with a non-ok
+  response (instead of a SolrException) and includes the remote error message as part of the exception message
+  (Tomás Fernández Löbbe)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
index b96cc2365c2..5c3f289c4f5 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
@@ -46,7 +46,6 @@ import org.apache.solr.client.solrj.request.RequestWriter;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
@@ -330,7 +329,8 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
             msg.append("\n\n\n\n");
             msg.append("request: ").append(method.getURI());
 
-            SolrException solrExc = new SolrException(ErrorCode.getErrorCode(statusCode), msg.toString());
+            SolrException solrExc;
+            NamedList<String> metadata = null;
             // parse out the metadata from the SolrException
             try {
               String encoding = "UTF-8"; // default
@@ -343,11 +343,21 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
               NamedList<Object> resp = client.parser.processResponse(rspBody, encoding);
               NamedList<Object> error = (NamedList<Object>) resp.get("error");
               if (error != null) {
-                solrExc.setMetadata((NamedList<String>) error.get("metadata"));
+                metadata = (NamedList<String>) error.get("metadata");
+                String remoteMsg = (String) error.get("msg");
+                if (remoteMsg != null) {
+                  msg.append("\nRemote error message: ");
+                  msg.append(remoteMsg);
+                }
               }
             } catch (Exception exc) {
               // don't want to fail to report error if parsing the response fails
               log.warn("Failed to parse error response from " + client.getBaseURL() + " due to: " + exc);
+            } finally {
+              solrExc = new HttpSolrClient.RemoteSolrException(client.getBaseURL(), statusCode, msg.toString(), null);
+              if (metadata != null) {
+                solrExc.setMetadata(metadata);
+              }
             }
 
             handleError(solrExc);
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
index f403f3f7b9e..d25280dfe26 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
@@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
 import org.apache.solr.client.solrj.embedded.SolrExampleStreamingTest.ErrorTrackingConcurrentUpdateSolrClient;
 import org.apache.solr.client.solrj.impl.BinaryResponseParser;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
 import org.apache.solr.client.solrj.impl.NoOpResponseParser;
 import org.apache.solr.client.solrj.impl.XMLResponseParser;
 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
@@ -463,7 +464,11 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
       concurrentClient.lastError = null;
       concurrentClient.add(doc);
       concurrentClient.blockUntilFinished();
-      assertNotNull("Should throw exception!", concurrentClient.lastError); 
+      assertNotNull("Should throw exception!", concurrentClient.lastError);
+      assertEquals("Unexpected exception type", 
+          RemoteSolrException.class, concurrentClient.lastError.getClass());
+      assertTrue("Unexpected exception message: " + concurrentClient.lastError.getMessage(), 
+          concurrentClient.lastError.getMessage().contains("Remote error message: Document contains multiple values for uniqueKey"));
     } else {
       log.info("Ignoring update test for client:" + client.getClass().getName());
     }

From 8b98b158ff9cc2a71216e12c894ca14352d31f0e Mon Sep 17 00:00:00 2001
From: Chris Hostetter <hossman@apache.org>
Date: Tue, 6 Dec 2016 14:47:03 -0700
Subject: [PATCH 19/53] SOLR-5043: New solr.dns.prevent.reverse.lookup system
 property that can be used to prevent long core (re)load delays on systems
 with missconfigured hostname/DNS

---
 solr/CHANGES.txt                              |  3 ++
 .../solr/handler/admin/SystemInfoHandler.java | 51 ++++++++++++++++---
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 4f7377c5f94..14dd2fae779 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -152,6 +152,9 @@ New Features
 
 * SOLR-9728: Ability to specify Key Store type in solr.in.sh file for SSL (Michael Suzuki, Kevin Risden)
 
+* SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core
+  (re)load delays on systems with missconfigured hostname/DNS (hossman)
+
 Optimizations
 ----------------------
 * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
index 35ef906d353..a873c09c984 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
@@ -31,7 +31,6 @@ import java.lang.management.PlatformManagedObject;
 import java.lang.management.RuntimeMXBean;
 import java.lang.reflect.InvocationTargetException;
 import java.net.InetAddress;
-import java.net.UnknownHostException;
 import java.nio.charset.Charset;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
@@ -50,6 +49,8 @@ import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.util.RTimer;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,8 +65,22 @@ import static org.apache.solr.common.params.CommonParams.NAME;
 public class SystemInfoHandler extends RequestHandlerBase 
 {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  
 
+  /**
+   * <p>
+   * Undocumented expert level system property to prevent doing a reverse lookup of our hostname.
+   * This property ill be logged as a suggested workaround if any probems are noticed when doing reverse 
+   * lookup.
+   * </p>
+   *
+   * <p>
+   * TODO: should we refactor this (and the associated logic) into a helper method for any other places
+   * where DNS is used?
+   * </p>
+   * @see #initHostname
+   */
+  private static final String PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP = "solr.dns.prevent.reverse.lookup";
+  
   // on some platforms, resolving canonical hostname can cause the thread
   // to block for several seconds if nameservices aren't available
   // so resolve this once per handler instance 
@@ -75,22 +90,42 @@ public class SystemInfoHandler extends RequestHandlerBase
   private CoreContainer cc;
 
   public SystemInfoHandler() {
-    super();
-    init();
+    this(null);
   }
 
   public SystemInfoHandler(CoreContainer cc) {
     super();
     this.cc = cc;
-    init();
+    initHostname();
   }
   
-  private void init() {
+  private void initHostname() {
+    if (null != System.getProperty(PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP, null)) {
+      log.info("Resolving canonical hostname for local host prevented due to '{}' sysprop",
+               PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
+      hostname = null;
+      return;
+    }
+    
+    RTimer timer = new RTimer();
     try {
       InetAddress addr = InetAddress.getLocalHost();
       hostname = addr.getCanonicalHostName();
-    } catch (UnknownHostException e) {
-      //default to null
+    } catch (Exception e) {
+      log.warn("Unable to resolve canonical hostname for local host, possible DNS misconfiguration. " +
+               "Set the '"+PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP+"' sysprop to true on startup to " +
+               "prevent future lookups if DNS can not be fixed.", e);
+      hostname = null;
+      return;
+    }
+    timer.stop();
+    
+    if (15000D < timer.getTime()) {
+      String readableTime = String.format(Locale.ROOT, "%.3f", (timer.getTime() / 1000));
+      log.warn("Resolving canonical hostname for local host took {} seconds, possible DNS misconfiguration. " +
+               "Set the '{}' sysprop to true on startup to prevent future lookups if DNS can not be fixed.",
+               readableTime, PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
+    
     }
   }
 

From 3f6164c76e2fc581abe4408066e08cf9fc817260 Mon Sep 17 00:00:00 2001
From: Noble Paul <noble@apache.org>
Date: Wed, 7 Dec 2016 18:42:07 +0530
Subject: [PATCH 20/53] added an extra testcase

---
 .../TestPlainTextEntityProcessor.java         | 108 ++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
index 82b757e6db6..a286d841c91 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
@@ -16,12 +16,23 @@
  */
 package org.apache.solr.handler.dataimport;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.sql.Blob;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.Properties;
 
+import org.apache.solr.common.util.Utils;
 import org.junit.Test;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * Test for PlainTextEntityProcessor
  *
@@ -42,6 +53,103 @@ public class TestPlainTextEntityProcessor extends AbstractDataImportHandlerTestC
     assertEquals(DS.s, sw.docs.get(0).getFieldValue("x"));
   }
 
+  static class BlobImpl implements Blob{
+    private final byte[] bytes;
+
+    BlobImpl(byte[] bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    public long length() throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public byte[] getBytes(long pos, int length) throws SQLException {
+      return bytes;
+    }
+
+    @Override
+    public InputStream getBinaryStream() throws SQLException {
+      return new ByteArrayInputStream(bytes);
+    }
+
+    @Override
+    public long position(byte[] pattern, long start) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public long position(Blob pattern, long start) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public int setBytes(long pos, byte[] bytes) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public int setBytes(long pos, byte[] bytes, int offset, int len) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public OutputStream setBinaryStream(long pos) throws SQLException {
+      return null;
+    }
+
+    @Override
+    public void truncate(long len) throws SQLException {
+
+    }
+
+    @Override
+    public void free() throws SQLException {
+
+    }
+
+    @Override
+    public InputStream getBinaryStream(long pos, long length) throws SQLException {
+      return new ByteArrayInputStream(bytes);
+    }
+  }
+
+  @Test
+  public void testSimple2() throws IOException {
+    DataImporter di = new DataImporter();
+    MockDataSource.setIterator("select id, name, blob_field from lw_table4", Collections.singletonList(Utils.makeMap("blob_field",new BlobImpl(DS.s.getBytes(UTF_8)) ) ).iterator());
+
+    String dc =
+
+        " <dataConfig>" +
+            "<dataSource name=\"ds1\" type=\"MockDataSource\"/>\n" +
+        " <!-- dataSource for FieldReaderDataSource -->\n" +
+        " <dataSource dataField=\"root.blob_field\" name=\"fr\" type=\"FieldReaderDataSource\"/>\n" +
+        "\n" +
+        " <document name=\"items\">\n" +
+        "   <entity dataSource=\"ds1\" name=\"root\" pk=\"id\"  query=\"select id, name, blob_field from lw_table4\" transformer=\"TemplateTransformer\">\n" +
+        "           <field column=\"id\" name=\"id\"/>\n" +
+        "\n" +
+        "        <entity dataField=\"root.blob_field\" dataSource=\"fr\" format=\"text\" name=\"n1\" processor=\"PlainTextEntityProcessor\" url=\"blob_field\">\n" +
+        "                       <field column=\"plainText\" name=\"plainText\"/>\n" +
+        "           </entity>\n" +
+        "\n" +
+        "   </entity>\n" +
+        " </document>\n" +
+        "</dataConfig>";
+    System.out.println(dc);
+    di.loadAndInit(dc);
+    redirectTempProperties(di);
+
+    TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl();
+    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
+    di.runCmd(rp, sw);
+    assertEquals(DS.s, sw.docs.get(0).getFieldValue("plainText"));
+  }
+
+
   public static class DS extends DataSource {
     static String s = "hello world";
 

From ca5e736db1df0cdf35f1b039350bfd5a9cdfa102 Mon Sep 17 00:00:00 2001
From: yonik <yonik@apache.org>
Date: Wed, 7 Dec 2016 11:08:33 -0500
Subject: [PATCH 21/53] SOLR-9822: speed up single-valued string fieldcache
 counting in dv facet processor

---
 solr/CHANGES.txt                              |   5 +
 .../facet/FacetFieldProcessorByArrayDV.java   |  49 +++++-
 .../apache/solr/search/facet/FieldUtil.java   | 147 ++++++++++++++++++
 .../apache/solr/uninverting/FieldCache.java   |   2 +-
 .../solr/uninverting/FieldCacheImpl.java      | 142 +++++++++--------
 5 files changed, 270 insertions(+), 75 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 14dd2fae779..8dee8379901 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -62,6 +62,11 @@ Optimizations
 * SOLR-9579: Make Solr's SchemaField implement Lucene's IndexableFieldType, removing the 
   creation of a Lucene FieldType every time a field is indexed. (John Call, yonik) 
 
+* SOLR-9822: JSON Facet API: Recover performance lost due to the DocValues transition to
+  an iterator API (LUCENE-7407).  This only fixes calculating counts for single-valued
+  string fields from the FieldCache, resulting in up to 56% better throughput for those cases.
+  (yonik)
+
 
 ==================  6.4.0 ==================
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
index 88adf6751d6..1481f187df5 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
@@ -33,6 +33,7 @@ import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.Filter;
+import org.apache.solr.uninverting.FieldCacheImpl;
 
 /**
  * Grabs values from {@link DocValues}.
@@ -184,15 +185,33 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
     int segMax = singleDv.getValueCount() + 1;
     final int[] counts = getCountArr( segMax );
 
+    /** alternate trial implementations
+     // ord
+     // FieldUtil.visitOrds(singleDv, disi,  (doc,ord)->{counts[ord+1]++;} );
+
+    FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
+    while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+      counts[ ordValues.getOrd() + 1]++;
+    }
+     **/
+
+
+    // calculate segment-local counts
     int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (singleDv.advanceExact(doc)) {
-        counts[ singleDv.ordValue() + 1 ]++;
-      } else {
-        counts[ 0 ]++;
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        counts[fc.getOrd(doc) + 1]++;
+      }
+    } else {
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          counts[singleDv.ordValue() + 1]++;
+        }
       }
     }
 
+    // convert segment-local counts to global counts
     for (int i=1; i<segMax; i++) {
       int segCount = counts[i];
       if (segCount > 0) {
@@ -250,12 +269,26 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
 
   private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
     int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (singleDv.advanceExact(doc)) {
-        int segOrd = singleDv.ordValue();
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter)singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        int segOrd = fc.getOrd(doc);
+        if (segOrd < 0) continue;
         int ord = (int)toGlobal.get(segOrd);
         countAcc.incrementCount(ord, 1);
       }
+
+    } else {
+
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          int segOrd = singleDv.ordValue();
+          int ord = (int) toGlobal.get(segOrd);
+          countAcc.incrementCount(ord, 1);
+        }
+      }
+
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
index 84255b9f9df..389b6d74045 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
@@ -21,10 +21,13 @@ import java.io.IOException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryContext;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.uninverting.FieldCacheImpl;
 
 /** @lucene.internal
  * Porting helper... may be removed if it offers no value in the future.
@@ -52,4 +55,148 @@ public class FieldUtil {
     return si == null ? DocValues.emptySortedSet() : si;
   }
 
+
+  /** The following ord visitors and wrappers are a work in progress and experimental
+   *  @lucene.experimental */
+  @FunctionalInterface
+  public interface OrdFunc {
+    void handleOrd(int docid, int ord); // TODO: throw exception?
+  }
+
+  public static boolean isFieldCache(SortedDocValues singleDv) {
+    return singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter;
+  }
+
+  public static void visitOrds(SortedDocValues singleDv, DocIdSetIterator disi, OrdFunc ordFunc) throws IOException {
+    int doc;
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        ordFunc.handleOrd(doc, fc.getOrd(doc));
+      }
+    } else {
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          ordFunc.handleOrd(doc, singleDv.ordValue());
+        } else {
+          // TODO: optionally pass in missingOrd?
+        }
+      }
+    }
+  }
+
+  public static OrdValues getOrdValues(SortedDocValues singleDv, DocIdSetIterator disi) {
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      return new FCOrdValues(fc, disi);
+    }
+    return new DVOrdValues(singleDv, disi);
+  }
+
+
+  public static abstract class OrdValues extends SortedDocValues {
+    int doc;
+    int ord;
+
+    public int getOrd() {
+      return ord;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public abstract int nextDoc() throws IOException;
+
+    @Override
+    public int advance(int target) throws IOException {
+      return 0; // TODO
+    }
+
+    @Override
+    public long cost() {
+      return 0;
+    }
+
+    @Override
+    public int getValueCount() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+
+  public static class FCOrdValues extends OrdValues {
+    FieldCacheImpl.SortedDocValuesImpl.Iter vals;
+    DocIdSetIterator disi;
+
+    public FCOrdValues(FieldCacheImpl.SortedDocValuesImpl.Iter iter, DocIdSetIterator disi) {
+      this.vals = iter;
+      this.disi = disi;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      doc = disi.nextDoc();
+      if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+      ord = vals.getOrd(doc); // todo: loop until a hit?
+      return doc;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int ordValue() {
+      return 0;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) throws IOException {
+      return null;
+    }
+  }
+
+  public static class DVOrdValues extends OrdValues {
+    SortedDocValues vals;
+    DocIdSetIterator disi;
+    int valDoc;
+
+    public DVOrdValues(SortedDocValues vals, DocIdSetIterator disi) {
+      this.vals = vals;
+      this.disi = disi;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      for (;;) {
+        // todo - use skipping when appropriate
+        doc = disi.nextDoc();
+        if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+        boolean match = vals.advanceExact(doc);
+        if (match) {
+          ord = vals.ordValue();
+          return doc;
+        }
+      }
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int ordValue() {
+      return 0;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) throws IOException {
+      return null;
+    }
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
index ea8f6ea1d3c..32f56152c8c 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -45,7 +45,7 @@ import org.apache.lucene.util.RamUsageEstimator;
  *
  * @lucene.internal
  */
-interface FieldCache {
+public interface FieldCache {
 
   /**
    * Placeholder indicating creation of this cache is currently in-progress.
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
index 2224010a85f..0ca687f3952 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
@@ -58,9 +58,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
  * Expert: The default cache implementation, storing all values in memory.
  * A WeakHashMap is used for storage.
  *
- * @since   lucene 1.4
+ * @lucene.internal
  */
-class FieldCacheImpl implements FieldCache {
+public class FieldCacheImpl implements FieldCache {
 
   private Map<Class<?>,Cache> caches;
   FieldCacheImpl() {
@@ -786,79 +786,89 @@ class FieldCacheImpl implements FieldCache {
       this.termOrdToBytesOffset = termOrdToBytesOffset;
       this.numOrd = numOrd;
     }
-    
+
     public SortedDocValues iterator() {
-      final BytesRef term = new BytesRef();
-      return new SortedDocValues() {
-        private int docID = -1;
+      return new Iter();
+    }
 
-        @Override
-        public int docID() {
-          return docID;
-        }
+    public class Iter extends SortedDocValues {
+      private int docID = -1;
+      private final BytesRef term = new BytesRef();
 
-        @Override
-        public int nextDoc() {
-          while (true) {
-            docID++;
-            if (docID >= docToTermOrd.size()) {
-              docID = NO_MORE_DOCS;
-              return docID;
-            }
-            if (docToTermOrd.get(docID) != 0) {
-              return docID;
-            }
-          }
-        }
+      /** @lucene.internal Specific to this implementation and subject to change.  For internal optimization only. */
+      public int getOrd(int docID) {
+        // Subtract 1, matching the 1+ord we did when
+        // storing, so that missing values, which are 0 in the
+        // packed ints, are returned as -1 ord:
+        return (int) docToTermOrd.get(docID)-1;
+      }
 
-        @Override
-        public int advance(int target) {
-          if (target < docToTermOrd.size()) {
-            docID = target;
-            if (docToTermOrd.get(docID) != 0) {
-              return docID;
-            } else{
-              return nextDoc();
-            }
-          } else {
+      @Override
+      public int docID() {
+        return docID;
+      }
+
+      @Override
+      public int nextDoc() {
+        while (true) {
+          docID++;
+          if (docID >= docToTermOrd.size()) {
             docID = NO_MORE_DOCS;
             return docID;
           }
-        }
-
-        @Override
-        public boolean advanceExact(int target) throws IOException {
-          docID = target;
-          return docToTermOrd.get(docID) != 0;
-        }
-
-        @Override
-        public long cost() {
-          return 0;
-        }
-        
-        @Override
-        public int ordValue() {
-          // Subtract 1, matching the 1+ord we did when
-          // storing, so that missing values, which are 0 in the
-          // packed ints, are returned as -1 ord:
-          return (int) docToTermOrd.get(docID)-1;
-        }
-
-        @Override
-        public int getValueCount() {
-          return numOrd;
-        }
-
-        @Override
-        public BytesRef lookupOrd(int ord) {
-          if (ord < 0) {
-            throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
+          if (docToTermOrd.get(docID) != 0) {
+            return docID;
           }
-          bytes.fill(term, termOrdToBytesOffset.get(ord));
-          return term;
         }
-      };
+      }
+
+      @Override
+      public int advance(int target) {
+        if (target < docToTermOrd.size()) {
+          docID = target;
+          if (docToTermOrd.get(docID) != 0) {
+            return docID;
+          } else{
+            return nextDoc();
+          }
+        } else {
+          docID = NO_MORE_DOCS;
+          return docID;
+        }
+      }
+
+      @Override
+      public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        return docToTermOrd.get(docID) != 0;
+      }
+
+      @Override
+      public long cost() {
+        return 0;
+      }
+
+      @Override
+      public int ordValue() {
+        // Subtract 1, matching the 1+ord we did when
+        // storing, so that missing values, which are 0 in the
+        // packed ints, are returned as -1 ord:
+        return (int) docToTermOrd.get(docID)-1;
+      }
+
+      @Override
+      public int getValueCount() {
+        return numOrd;
+      }
+
+      @Override
+      public BytesRef lookupOrd(int ord) {
+        if (ord < 0) {
+          throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
+        }
+        bytes.fill(term, termOrdToBytesOffset.get(ord));
+        return term;
+      }
     }
 
     @Override

From d2ed42b847b1114fe3d0befc788fba55255d4ee2 Mon Sep 17 00:00:00 2001
From: Christine Poerschke <cpoerschke@apache.org>
Date: Wed, 7 Dec 2016 18:39:04 +0000
Subject: [PATCH 22/53] removed two unused imports in
 TestPlainTextEntityProcessor.java

---
 .../solr/handler/dataimport/TestPlainTextEntityProcessor.java   | 2 --
 1 file changed, 2 deletions(-)

diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
index a286d841c91..23854382dde 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
@@ -21,10 +21,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
 import java.sql.Blob;
 import java.sql.SQLException;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Properties;
 

From bfc3690d5203cee20550450bac3771e5c2b85cbf Mon Sep 17 00:00:00 2001
From: Christine Poerschke <cpoerschke@apache.org>
Date: Wed, 7 Dec 2016 20:43:49 +0000
Subject: [PATCH 23/53] SOLR-8542: couple of tweaks (Michael Nilsson, Diego
 Ceccarelli, Christine Poerschke)

* removed code triplication in ManagedModelStore
* LTRScoringQuery.java tweaks
* FeatureLogger.makeFeatureVector(...) can now safely be called repeatedly (though that doesn't happen at present)
* make Feature.FeatureWeight.extractTerms a no-op; (OriginalScore|SolrFeature)Weight now implement extractTerms

* LTRThreadModule javadocs and README.md tweaks

* add TestFieldValueFeature.testBooleanValue test; replace "T"/"F" magic string use in FieldValueFeature
* add TestOriginalScoreScorer test; add OriginalScoreScorer.freq() method
* in TestMultipleAdditiveTreesModel revive dead explain test
---
 solr/contrib/ltr/README.md                    |  6 +--
 .../org/apache/solr/ltr/FeatureLogger.java    | 10 ++--
 .../org/apache/solr/ltr/LTRScoringQuery.java  | 22 ++++-----
 .../org/apache/solr/ltr/LTRThreadModule.java  | 29 +++++++++++
 .../org/apache/solr/ltr/feature/Feature.java  |  3 +-
 .../solr/ltr/feature/FieldValueFeature.java   | 18 ++++---
 .../ltr/feature/OriginalScoreFeature.java     | 12 ++++-
 .../apache/solr/ltr/feature/SolrFeature.java  | 17 +++++--
 .../ltr/store/rest/ManagedFeatureStore.java   |  1 -
 .../ltr/store/rest/ManagedModelStore.java     | 32 +++++--------
 .../solr/collection1/conf/schema.xml          |  2 +
 .../ltr/feature/TestFieldValueFeature.java    | 48 ++++++++++++++++---
 .../ltr/feature/TestOriginalScoreScorer.java  | 47 ++++++++++++++++++
 .../model/TestMultipleAdditiveTreesModel.java | 44 ++++++++---------
 .../org/apache/solr/schema/BoolField.java     |  4 +-
 15 files changed, 212 insertions(+), 83 deletions(-)
 create mode 100644 solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java

diff --git a/solr/contrib/ltr/README.md b/solr/contrib/ltr/README.md
index 88e2f67b941..2033ffc3e7b 100644
--- a/solr/contrib/ltr/README.md
+++ b/solr/contrib/ltr/README.md
@@ -390,17 +390,17 @@ About half the time for ranking is spent in the creation of weights for each fea
   <!-- Query parser used to rerank top docs with a provided model -->
   <queryParser name="ltr" class="org.apache.solr.ltr.search.LTRQParserPlugin">
      <int name="threadModule.totalPoolThreads">10</int> <!-- Maximum threads to share for all requests -->
-     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single requests-->
+     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single request -->
   </queryParser>
   
   <!-- Transformer for extracting features -->
   <transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
      <int name="threadModule.totalPoolThreads">10</int> <!-- Maximum threads to share for all requests -->
-     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single requests-->
+     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single request -->
   </transformer>
 </config>
 
 ```
   
-The threadModule.totalPoolThreads option limits the total number of threads to be used across all query instances at any given time. threadModule.numThreadsPerRequest limits the number of threads used to process a single query. In the above example, 10 threads will be used to services all queries and a maximum of 5 threads to service a single query. If the solr instances is expected to receive no more than one query at a time, it is best to set both these numbers to the same value. If multiple queries need to serviced simultaneously, the numbers can be adjusted based on the expected response times. If the value of  threadModule.numThreadsPerRequest is higher, the reponse time for a single query will be improved upto a point. If multiple queries are serviced simultaneously, the threadModule.totalPoolThreads imposes a contention between the queries if (threadModule.numThreadsPerRequest*total parallel queries > threadModule.totalPoolThreads). 
+The threadModule.totalPoolThreads option limits the total number of threads to be used across all query instances at any given time. threadModule.numThreadsPerRequest limits the number of threads used to process a single query. In the above example, 10 threads will be used to services all queries and a maximum of 5 threads to service a single query. If the solr instance is expected to receive no more than one query at a time, it is best to set both these numbers to the same value. If multiple queries need to be serviced simultaneously, the numbers can be adjusted based on the expected response times. If the value of threadModule.numThreadsPerRequest is higher, the response time for a single query will be improved upto a point. If multiple queries are serviced simultaneously, the threadModule.totalPoolThreads imposes a contention between the queries if (threadModule.numThreadsPerRequest*total parallel queries > threadModule.totalPoolThreads).
 
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
index a5afd05952c..9c10c2c6917 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
@@ -151,7 +151,6 @@ public abstract class FeatureLogger<FV_TYPE> {
   }
 
   public static class CSVFeatureLogger extends FeatureLogger<String> {
-    StringBuilder sb = new StringBuilder(500);
     char keyValueSep = ':';
     char featureSep = ';';
 
@@ -171,6 +170,10 @@ public abstract class FeatureLogger<FV_TYPE> {
 
     @Override
     public String makeFeatureVector(LTRScoringQuery.FeatureInfo[] featuresInfo) {
+      // Allocate the buffer to a size based on the number of features instead of the
+      // default 16.  You need space for the name, value, and two separators per feature,
+      // but not all the features are expected to fire, so this is just a naive estimate.
+      StringBuilder sb = new StringBuilder(featuresInfo.length * 3);
       boolean isDense = featureFormat.equals(FeatureFormat.DENSE);
       for (LTRScoringQuery.FeatureInfo featInfo:featuresInfo) {
         if (featInfo.isUsed() || isDense){
@@ -181,9 +184,8 @@ public abstract class FeatureLogger<FV_TYPE> {
         }
       }
 
-      final String features = (sb.length() > 0 ? sb.substring(0,
-          sb.length() - 1) : "");
-      sb.setLength(0);
+      final String features = (sb.length() > 0 ?
+          sb.substring(0, sb.length() - 1) : "");
 
       return features;
     }
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
index 991c1edf58f..d60ebf55bb0 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
@@ -205,10 +205,10 @@ public class LTRScoringQuery extends Query {
     List<Feature.FeatureWeight > featureWeights = new ArrayList<>(features.size());
 
     if (querySemaphore == null) {
-      createWeights(searcher, needsScores, boost, featureWeights, features);
+      createWeights(searcher, needsScores, featureWeights, features);
     }
     else{
-      createWeightsParallel(searcher, needsScores, boost, featureWeights, features);
+      createWeightsParallel(searcher, needsScores, featureWeights, features);
     }
     int i=0, j = 0;
     if (this.extractAllFeatures) {
@@ -228,7 +228,7 @@ public class LTRScoringQuery extends Query {
     return new ModelWeight(modelFeaturesWeights, extractedFeatureWeights, allFeatures.size());
   }
 
-  private void createWeights(IndexSearcher searcher, boolean needsScores, float boost,
+  private void createWeights(IndexSearcher searcher, boolean needsScores,
       List<Feature.FeatureWeight > featureWeights, Collection<Feature> features) throws IOException {
     final SolrQueryRequest req = getRequest();
     // since the feature store is a linkedhashmap order is preserved
@@ -271,7 +271,7 @@ public class LTRScoringQuery extends Query {
     }
   } // end of call CreateWeightCallable
 
-  private void createWeightsParallel(IndexSearcher searcher, boolean needsScores, float boost,
+  private void createWeightsParallel(IndexSearcher searcher, boolean needsScores,
       List<Feature.FeatureWeight > featureWeights, Collection<Feature> features) throws RuntimeException {
 
     final SolrQueryRequest req = getRequest();
@@ -401,8 +401,9 @@ public class LTRScoringQuery extends Query {
     /**
      * Goes through all the stored feature values, and calculates the normalized
      * values for all the features that will be used for scoring.
+     * Then calculate and return the model's score.
      */
-    private void makeNormalizedFeatures() {
+    private float makeNormalizedFeaturesAndScore() {
       int pos = 0;
       for (final Feature.FeatureWeight feature : modelFeatureWeights) {
         final int featureId = feature.getIndex();
@@ -415,6 +416,7 @@ public class LTRScoringQuery extends Query {
         pos++;
       }
       ltrScoringModel.normalizeFeaturesInPlace(modelFeatureValuesNormalized);
+      return ltrScoringModel.score(modelFeatureValuesNormalized);
     }
 
     @Override
@@ -491,8 +493,8 @@ public class LTRScoringQuery extends Query {
         for (final Feature.FeatureWeight.FeatureScorer subSocer : featureScorers) {
           subSocer.setDocInfo(docInfo);
         }
-        if (featureScorers.size() <= 1) { // TODO: Allow the use of dense
-          // features in other cases
+        if (featureScorers.size() <= 1) {
+          // future enhancement: allow the use of dense features in other cases
           featureTraversalScorer = new DenseModelScorer(weight, featureScorers);
         } else {
           featureTraversalScorer = new SparseModelScorer(weight, featureScorers);
@@ -570,8 +572,7 @@ public class LTRScoringQuery extends Query {
               featuresInfo[featureId].setUsed(true);
             }
           }
-          makeNormalizedFeatures();
-          return ltrScoringModel.score(modelFeatureValuesNormalized);
+          return makeNormalizedFeaturesAndScore();
         }
 
         @Override
@@ -663,8 +664,7 @@ public class LTRScoringQuery extends Query {
               }
             }
           }
-          makeNormalizedFeatures();
-          return ltrScoringModel.score(modelFeatureValuesNormalized);
+          return makeNormalizedFeaturesAndScore();
         }
 
         @Override
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
index 8e2563f1e08..b8d0bda3a46 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
@@ -29,6 +29,35 @@ import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.apache.solr.util.SolrPluginUtils;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;
 
+/**
+ * The LTRThreadModule is optionally used by the {@link org.apache.solr.ltr.search.LTRQParserPlugin} and
+ * {@link org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory LTRFeatureLoggerTransformerFactory}
+ * classes to parallelize the creation of {@link org.apache.solr.ltr.feature.Feature.FeatureWeight Feature.FeatureWeight}
+ * objects.
+ * <p>
+ * Example configuration:
+ * <pre>
+  &lt;queryParser name="ltr" class="org.apache.solr.ltr.search.LTRQParserPlugin"&gt;
+     &lt;int name="threadModule.totalPoolThreads"&gt;10&lt;/int&gt;
+     &lt;int name="threadModule.numThreadsPerRequest"&gt;5&lt;/int&gt;
+  &lt;/queryParser&gt;
+
+  &lt;transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory"&gt;
+     &lt;int name="threadModule.totalPoolThreads"&gt;10&lt;/int&gt;
+     &lt;int name="threadModule.numThreadsPerRequest"&gt;5&lt;/int&gt;
+  &lt;/transformer&gt;
+</pre>
+ * If an individual solr instance is expected to receive no more than one query at a time, it is best
+ * to set <code>totalPoolThreads</code> and <code>numThreadsPerRequest</code> to the same value.
+ *
+ * If multiple queries need to be serviced simultaneously then <code>totalPoolThreads</code> and
+ * <code>numThreadsPerRequest</code> can be adjusted based on the expected response times.
+ *
+ * If the value of <code>numThreadsPerRequest</code> is higher, the response time for a single query
+ * will be improved up to a point. If multiple queries are serviced simultaneously, the value of
+ * <code>totalPoolThreads</code> imposes a contention between the queries if
+ * <code>(totalPoolThreads &lt; numThreadsPerRequest * total parallel queries)</code>.
+ */
 final public class LTRThreadModule implements NamedListInitializedPlugin {
 
   public static LTRThreadModule getInstance(NamedList args) {
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
index 228b964e6b9..48e89423ca1 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
@@ -258,8 +258,7 @@ public abstract class Feature extends Query {
 
     @Override
     public void extractTerms(Set<Term> terms) {
-      // needs to be implemented by query subclasses
-      throw new UnsupportedOperationException();
+      // no-op
     }
 
     /**
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 279adbc3ca3..5fcf144d89c 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -29,6 +29,7 @@ import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.BoolField;
 
 /**
  * This feature returns the value of a field in the current document
@@ -119,13 +120,16 @@ public class FieldValueFeature extends Feature {
             return number.floatValue();
           } else {
             final String string = indexableField.stringValue();
-            // boolean values in the index are encoded with the
-            // chars T/F
-            if (string.equals("T")) {
-              return 1;
-            }
-            if (string.equals("F")) {
-              return 0;
+            if (string.length() == 1) {
+              // boolean values in the index are encoded with the
+              // a single char contained in TRUE_TOKEN or FALSE_TOKEN
+              // (see BoolField)
+              if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
+                return 1;
+              }
+              if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
+                return 0;
+              }
             }
           }
         } catch (final IOException e) {
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
index 125615cbb4f..549880be06e 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
@@ -19,8 +19,10 @@ package org.apache.solr.ltr.feature;
 import java.io.IOException;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@@ -76,7 +78,10 @@ public class OriginalScoreFeature extends Feature {
       return "OriginalScoreFeature [query:" + originalQuery.toString() + "]";
     }
 
-
+    @Override
+    public void extractTerms(Set<Term> terms) {
+      w.extractTerms(terms);
+    }
 
     @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
@@ -102,6 +107,11 @@ public class OriginalScoreFeature extends Feature {
         return (docInfo.hasOriginalDocScore() ? docInfo.getOriginalDocScore() : originalScorer.score());
       }
 
+      @Override
+      public int freq() throws IOException {
+        return originalScorer.freq();
+      }
+
       @Override
       public int docID() {
         return originalScorer.docID();
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
index cb7c1a0c81a..13eb96fee2a 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
@@ -21,8 +21,10 @@ import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
@@ -123,9 +125,9 @@ public class SolrFeature extends Feature {
    * Weight for a SolrFeature
    **/
   public class SolrFeatureWeight extends FeatureWeight {
-    Weight solrQueryWeight;
-    Query query;
-    List<Query> queryAndFilters;
+    final private Weight solrQueryWeight;
+    final private Query query;
+    final private List<Query> queryAndFilters;
 
     public SolrFeatureWeight(IndexSearcher searcher,
         SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) throws IOException {
@@ -174,6 +176,8 @@ public class SolrFeature extends Feature {
         if (query != null) {
           queryAndFilters.add(query);
           solrQueryWeight = searcher.createNormalizedWeight(query, true);
+        } else {
+          solrQueryWeight = null;
         }
       } catch (final SyntaxError e) {
         throw new FeatureException("Failed to parse feature query.", e);
@@ -201,6 +205,13 @@ public class SolrFeature extends Feature {
       }
     }
 
+    @Override
+    public void extractTerms(Set<Term> terms) {
+      if (solrQueryWeight != null) {
+        solrQueryWeight.extractTerms(terms);
+      }
+    }
+
     @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
       Scorer solrScorer = null;
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
index beb217c5c37..2c7bce58156 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
@@ -57,7 +57,6 @@ public class ManagedFeatureStore extends ManagedResource implements ManagedResou
 
   /** the feature store rest endpoint **/
   public static final String REST_END_POINT = "/schema/feature-store";
-  // TODO: reduce from public to package visibility (once tests no longer need public access)
 
   /** name of the attribute containing the feature class **/
   static final String CLASS_KEY = "class";
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
index 97aaa4004ad..9c19b0a7c26 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
@@ -61,7 +61,6 @@ public class ManagedModelStore extends ManagedResource implements ManagedResourc
 
   /** the model store rest endpoint **/
   public static final String REST_END_POINT = "/schema/model-store";
-  // TODO: reduce from public to package visibility (once tests no longer need public access)
 
   /**
    * Managed model store: the name of the attribute containing all the models of
@@ -124,16 +123,20 @@ public class ManagedModelStore extends ManagedResource implements ManagedResourc
     if ((managedData != null) && (managedData instanceof List)) {
       final List<Map<String,Object>> up = (List<Map<String,Object>>) managedData;
       for (final Map<String,Object> u : up) {
-        try {
-          final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, u, managedFeatureStore);
-          addModel(algo);
-        } catch (final ModelException e) {
-          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-        }
+        addModelFromMap(u);
       }
     }
   }
 
+  private void addModelFromMap(Map<String,Object> modelMap) {
+    try {
+      final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, modelMap, managedFeatureStore);
+      addModel(algo);
+    } catch (final ModelException e) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+    }
+  }
+
   public synchronized void addModel(LTRScoringModel ltrScoringModel) throws ModelException {
     try {
       log.info("adding model {}", ltrScoringModel.getName());
@@ -146,26 +149,17 @@ public class ManagedModelStore extends ManagedResource implements ManagedResourc
   @SuppressWarnings("unchecked")
   @Override
   protected Object applyUpdatesToManagedData(Object updates) {
+
     if (updates instanceof List) {
       final List<Map<String,Object>> up = (List<Map<String,Object>>) updates;
       for (final Map<String,Object> u : up) {
-        try {
-          final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, u, managedFeatureStore);
-          addModel(algo);
-        } catch (final ModelException e) {
-          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-        }
+        addModelFromMap(u);
       }
     }
 
     if (updates instanceof Map) {
       final Map<String,Object> map = (Map<String,Object>) updates;
-      try {
-        final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, map, managedFeatureStore);
-        addModel(algo);
-      } catch (final ModelException e) {
-        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-      }
+      addModelFromMap(map);
     }
 
     return modelsAsManagedResources(store.getModels());
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 15cf140cc09..0b958c0aca3 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -24,6 +24,8 @@
     <field name="keywords" type="text_general" indexed="true" stored="true" multiValued="true"/>
     <field name="popularity" type="int" indexed="true" stored="true" />
     <field name="normHits" type="float" indexed="true" stored="true" />
+    <field name="isTrendy" type="boolean" indexed="true" stored="true" />
+
     <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
     <field name="_version_" type="long" indexed="true" stored="true"/>
 
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index af150c060e4..95742733cc7 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -32,21 +32,21 @@ public class TestFieldValueFeature extends TestRerankBase {
     setuptest("solrconfig-ltr.xml", "schema.xml");
 
     assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
-        "1"));
+        "1","isTrendy","true"));
     assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description",
         "w2 2asd asdd didid", "popularity", "2"));
     assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
-        "3"));
+        "3","isTrendy","true"));
     assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
-        "4"));
+        "4","isTrendy","false"));
     assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
-        "5"));
+        "5","isTrendy","true"));
     assertU(adoc("id", "6", "title", "w1 w2", "description", "w1 w2",
-        "popularity", "6"));
+        "popularity", "6","isTrendy","false"));
     assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description",
-        "w1 w2 w3 w4 w5 w8", "popularity", "7"));
+        "w1 w2 w3 w4 w5 w8", "popularity", "7","isTrendy","true"));
     assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description",
-        "w1 w1 w1 w2 w2", "popularity", "8"));
+        "w1 w1 w1 w2 w2", "popularity", "8","isTrendy","false"));
 
     // a document without the popularity field
     assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity"));
@@ -169,5 +169,39 @@ public class TestFieldValueFeature extends TestRerankBase {
 
   }
 
+  @Test
+  public void testBooleanValue() throws Exception {
+    final String fstore = "test_boolean_store";
+    loadFeature("trendy", FieldValueFeature.class.getCanonicalName(), fstore,
+            "{\"field\":\"isTrendy\"}");
+
+    loadModel("trendy-model", LinearModel.class.getCanonicalName(),
+            new String[] {"trendy"}, fstore, "{\"weights\":{\"trendy\":1.0}}");
+
+    SolrQuery query = new SolrQuery();
+    query.setQuery("id:4");
+    query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
+    query.add("fl", "[fv]");
+    assertJQ("/query" + query.toQueryString(),
+            "/response/docs/[0]/=={'[fv]':'trendy:0.0'}");
+
+
+    query = new SolrQuery();
+    query.setQuery("id:5");
+    query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
+    query.add("fl", "[fv]");
+    assertJQ("/query" + query.toQueryString(),
+            "/response/docs/[0]/=={'[fv]':'trendy:1.0'}");
+
+    // check default value is false
+    query = new SolrQuery();
+    query.setQuery("id:2");
+    query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
+    query.add("fl", "[fv]");
+    assertJQ("/query" + query.toQueryString(),
+            "/response/docs/[0]/=={'[fv]':'trendy:0.0'}");
+
+  }
+
 
 }
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java
new file mode 100644
index 00000000000..e85ebedf084
--- /dev/null
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.feature;
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+public class TestOriginalScoreScorer extends LuceneTestCase {
+
+  @Test
+  public void testOverridesAbstractScorerMethods() {
+    final Class<?> ossClass = OriginalScoreFeature.OriginalScoreWeight.OriginalScoreScorer.class;
+    for (final Method scorerClassMethod : Scorer.class.getDeclaredMethods()) {
+      final int modifiers = scorerClassMethod.getModifiers();
+      if (!Modifier.isAbstract(modifiers)) continue;
+
+      try {
+        final Method ossClassMethod = ossClass.getDeclaredMethod(
+            scorerClassMethod.getName(),
+            scorerClassMethod.getParameterTypes());
+        assertEquals("getReturnType() difference",
+            scorerClassMethod.getReturnType(),
+            ossClassMethod.getReturnType());
+      } catch (NoSuchMethodException e) {
+        fail(ossClass + " needs to override '" + scorerClassMethod + "'");
+      }
+    }
+  }
+}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
index 3748331a43e..560437078cb 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
@@ -16,7 +16,7 @@
  */
 package org.apache.solr.ltr.model;
 
-//import static org.junit.internal.matchers.StringContains.containsString;
+import static org.junit.internal.matchers.StringContains.containsString;
 
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.ltr.TestRerankBase;
@@ -93,30 +93,28 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase {
 
     // test out the explain feature, make sure it returns something
     query.setParam("debugQuery", "on");
-    String qryResult = JQ("/query" + query.toQueryString());
 
+    String qryResult = JQ("/query" + query.toQueryString());
     qryResult = qryResult.replaceAll("\n", " ");
-    // FIXME containsString doesn't exist.
-    // assertThat(qryResult, containsString("\"debug\":{"));
-    // qryResult = qryResult.substring(qryResult.indexOf("debug"));
-    //
-    // assertThat(qryResult, containsString("\"explain\":{"));
-    // qryResult = qryResult.substring(qryResult.indexOf("explain"));
-    //
-    // assertThat(qryResult, containsString("multipleadditivetreesmodel"));
-    // assertThat(qryResult,
-    // containsString(MultipleAdditiveTreesModel.class.getCanonicalName()));
-    //
-    // assertThat(qryResult, containsString("-100.0 = tree 0"));
-    // assertThat(qryResult, containsString("50.0 = tree 0"));
-    // assertThat(qryResult, containsString("-20.0 = tree 1"));
-    // assertThat(qryResult, containsString("'matchedTitle':1.0 > 0.5"));
-    // assertThat(qryResult, containsString("'matchedTitle':0.0 <= 0.5"));
-    //
-    // assertThat(qryResult, containsString(" Go Right "));
-    // assertThat(qryResult, containsString(" Go Left "));
-    // assertThat(qryResult,
-    // containsString("'this_feature_doesnt_exist' does not exist in FV"));
+
+    assertThat(qryResult, containsString("\"debug\":{"));
+    qryResult = qryResult.substring(qryResult.indexOf("debug"));
+
+    assertThat(qryResult, containsString("\"explain\":{"));
+    qryResult = qryResult.substring(qryResult.indexOf("explain"));
+
+    assertThat(qryResult, containsString("multipleadditivetreesmodel"));
+    assertThat(qryResult, containsString(MultipleAdditiveTreesModel.class.getCanonicalName()));
+
+    assertThat(qryResult, containsString("-100.0 = tree 0"));
+    assertThat(qryResult, containsString("50.0 = tree 0"));
+    assertThat(qryResult, containsString("-20.0 = tree 1"));
+    assertThat(qryResult, containsString("'matchedTitle':1.0 > 0.5"));
+    assertThat(qryResult, containsString("'matchedTitle':0.0 <= 0.5"));
+
+    assertThat(qryResult, containsString(" Go Right "));
+    assertThat(qryResult, containsString(" Go Left "));
+    assertThat(qryResult, containsString("'this_feature_doesnt_exist' does not exist in FV"));
   }
 
   @Test
diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java
index 210ea0ba103..1645ee6cbf6 100644
--- a/solr/core/src/java/org/apache/solr/schema/BoolField.java
+++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java
@@ -71,8 +71,8 @@ public class BoolField extends PrimitiveFieldType {
   }
 
   // avoid instantiating every time...
-  protected final static char[] TRUE_TOKEN = {'T'};
-  protected final static char[] FALSE_TOKEN = {'F'};
+  public final static char[] TRUE_TOKEN = {'T'};
+  public final static char[] FALSE_TOKEN = {'F'};
 
   ////////////////////////////////////////////////////////////////////////
   // TODO: look into creating my own queryParser that can more efficiently

From cacabc9a4edf299f1dd2e5d08cc66845bc52fe98 Mon Sep 17 00:00:00 2001
From: Christine Poerschke <cpoerschke@apache.org>
Date: Wed, 7 Dec 2016 21:16:42 +0000
Subject: [PATCH 24/53] fix java warning in SolrQueryTest

---
 .../src/test/org/apache/solr/client/solrj/SolrQueryTest.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
index d27847f6803..1c86c93db53 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
@@ -441,7 +441,7 @@ public class SolrQueryTest extends LuceneTestCase {
     solrQuery.addMoreLikeThisField("mlt3");
     solrQuery.addMoreLikeThisField("mlt4");
     assertEquals(4, solrQuery.getMoreLikeThisFields().length);
-    solrQuery.setMoreLikeThisFields(null);
+    solrQuery.setMoreLikeThisFields((String[])null);
     assertTrue(null == solrQuery.getMoreLikeThisFields());
     assertFalse(solrQuery.getMoreLikeThis());
 

From b97d9d7478f99660c1cfc91ef4461b7405254dea Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Wed, 7 Dec 2016 18:59:23 -0500
Subject: [PATCH 25/53] LUCENE-7583: buffer small leaf-block writes in
 BKDWriter

---
 lucene/CHANGES.txt                            |  4 +
 .../CompressingStoredFieldsWriter.java        | 19 +++--
 .../CompressingTermVectorsWriter.java         | 11 +--
 .../GrowableByteArrayDataOutput.java          | 32 +++++--
 .../org/apache/lucene/util/bkd/BKDWriter.java | 85 ++++++++++---------
 .../apache/lucene/util/bkd/DocIdsWriter.java  |  4 +-
 .../TestGrowableByteArrayDataOutput.java      | 14 +--
 7 files changed, 101 insertions(+), 68 deletions(-)
 rename lucene/core/src/java/org/apache/lucene/{codecs/compressing => util}/GrowableByteArrayDataOutput.java (83%)
 rename lucene/core/src/test/org/apache/lucene/{codecs/compressing => store}/TestGrowableByteArrayDataOutput.java (89%)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c6c39ac45fb..26a9dec0014 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -134,6 +134,10 @@ Optimizations
   a compressed format, using substantially less RAM in some cases
   (Adrien Grand, Mike McCandless)
 
+* LUCENE-7583: BKD writing now buffers each leaf block in heap before
+  writing to disk, giving a small speedup in points-heavy use cases.
+  (Mike McCandless)
+
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
index 1956ab70683..cda855defcb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
@@ -33,6 +33,7 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -157,7 +158,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     }
     this.numStoredFields[numBufferedDocs] = numStoredFieldsInDoc;
     numStoredFieldsInDoc = 0;
-    endOffsets[numBufferedDocs] = bufferedDocs.length;
+    endOffsets[numBufferedDocs] = bufferedDocs.getPosition();
     ++numBufferedDocs;
     if (triggerFlush()) {
       flush();
@@ -210,7 +211,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
   }
 
   private boolean triggerFlush() {
-    return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes
+    return bufferedDocs.getPosition() >= chunkSize || // chunks of at least chunkSize bytes
         numBufferedDocs >= maxDocsPerChunk;
   }
 
@@ -223,23 +224,23 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
       lengths[i] = endOffsets[i] - endOffsets[i - 1];
       assert lengths[i] >= 0;
     }
-    final boolean sliced = bufferedDocs.length >= 2 * chunkSize;
+    final boolean sliced = bufferedDocs.getPosition() >= 2 * chunkSize;
     writeHeader(docBase, numBufferedDocs, numStoredFields, lengths, sliced);
 
     // compress stored fields to fieldsStream
     if (sliced) {
       // big chunk, slice it
-      for (int compressed = 0; compressed < bufferedDocs.length; compressed += chunkSize) {
-        compressor.compress(bufferedDocs.bytes, compressed, Math.min(chunkSize, bufferedDocs.length - compressed), fieldsStream);
+      for (int compressed = 0; compressed < bufferedDocs.getPosition(); compressed += chunkSize) {
+        compressor.compress(bufferedDocs.getBytes(), compressed, Math.min(chunkSize, bufferedDocs.getPosition() - compressed), fieldsStream);
       }
     } else {
-      compressor.compress(bufferedDocs.bytes, 0, bufferedDocs.length, fieldsStream);
+      compressor.compress(bufferedDocs.getBytes(), 0, bufferedDocs.getPosition(), fieldsStream);
     }
 
     // reset
     docBase += numBufferedDocs;
     numBufferedDocs = 0;
-    bufferedDocs.length = 0;
+    bufferedDocs.reset();
     numChunks++;
   }
   
@@ -459,7 +460,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
       flush();
       numDirtyChunks++; // incomplete: we had to force this flush
     } else {
-      assert bufferedDocs.length == 0;
+      assert bufferedDocs.getPosition() == 0;
     }
     if (docBase != numDocs) {
       throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
@@ -468,7 +469,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     fieldsStream.writeVLong(numChunks);
     fieldsStream.writeVLong(numDirtyChunks);
     CodecUtil.writeFooter(fieldsStream);
-    assert bufferedDocs.length == 0;
+    assert bufferedDocs.getPosition() == 0;
   }
   
   // bulk merge is scary: its caused corruption bugs in the past.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
index 46a289a97b5..9bd2483389e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
@@ -37,6 +37,7 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -269,8 +270,8 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
   @Override
   public void finishDocument() throws IOException {
     // append the payload bytes of the doc after its terms
-    termSuffixes.writeBytes(payloadBytes.bytes, payloadBytes.length);
-    payloadBytes.length = 0;
+    termSuffixes.writeBytes(payloadBytes.getBytes(), payloadBytes.getPosition());
+    payloadBytes.reset();
     ++numDocs;
     if (triggerFlush()) {
       flush();
@@ -316,7 +317,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
   }
 
   private boolean triggerFlush() {
-    return termSuffixes.length >= chunkSize
+    return termSuffixes.getPosition() >= chunkSize
         || pendingDocs.size() >= MAX_DOCUMENTS_PER_CHUNK;
   }
 
@@ -355,14 +356,14 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
       flushPayloadLengths();
 
       // compress terms and payloads and write them to the output
-      compressor.compress(termSuffixes.bytes, 0, termSuffixes.length, vectorsStream);
+      compressor.compress(termSuffixes.getBytes(), 0, termSuffixes.getPosition(), vectorsStream);
     }
 
     // reset
     pendingDocs.clear();
     curDoc = null;
     curField = null;
-    termSuffixes.length = 0;
+    termSuffixes.reset();
     numChunks++;
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java b/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
similarity index 83%
rename from lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
rename to lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
index ec551d14d1f..5f00d4a6ab0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
+++ b/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.codecs.compressing;
 
+package org.apache.lucene.store;
 
 import java.io.IOException;
 
@@ -25,6 +25,7 @@ import org.apache.lucene.util.UnicodeUtil;
 
 /**
  * A {@link DataOutput} that can be used to build a byte[].
+ *
  * @lucene.internal
  */
 public final class GrowableByteArrayDataOutput extends DataOutput {
@@ -33,12 +34,13 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
   static final int MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING = 65536;
 
   /** The bytes */
-  public byte[] bytes;
+  private byte[] bytes;
+
   /** The length */
-  public int length;
+  private int length;
 
   // scratch for utf8 encoding of small strings
-  byte[] scratchBytes = new byte[16];
+  private byte[] scratchBytes;
 
   /** Create a {@link GrowableByteArrayDataOutput} with the given initial capacity. */
   public GrowableByteArrayDataOutput(int cp) {
@@ -57,7 +59,9 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
   @Override
   public void writeBytes(byte[] b, int off, int len) {
     final int newLength = length + len;
-    bytes = ArrayUtil.grow(bytes, newLength);
+    if (newLength > bytes.length) {
+      bytes = ArrayUtil.grow(bytes, newLength);
+    }
     System.arraycopy(b, off, bytes, length, len);
     length = newLength;
   }
@@ -68,7 +72,11 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
     if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
       // string is small enough that we don't need to save memory by falling back to double-pass approach
       // this is just an optimized writeString() that re-uses scratchBytes.
-      scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
+      if (scratchBytes == null) {
+        scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)];
+      } else {
+        scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
+      }
       int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
       writeVInt(len);
       writeBytes(scratchBytes, len);
@@ -80,4 +88,16 @@ public final class GrowableByteArrayDataOutput extends DataOutput {
       length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
     }
   }
+
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  public int getPosition() {
+    return length;
+  }
+
+  public void reset() {
+    length = 0;
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index c82a0c8bf25..96575780b44 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -30,7 +30,9 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
@@ -478,8 +480,8 @@ public class BKDWriter implements Closeable {
     }
 
     build(1, numLeaves, values, 0, Math.toIntExact(pointCount), out,
-        minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
-        new int[maxPointsInLeafNode]);
+          minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
+          new int[maxPointsInLeafNode]);
 
     long indexFP = out.getFilePointer();
     writeIndex(out, leafBlockFPs, splitPackedValues);
@@ -556,6 +558,9 @@ public class BKDWriter implements Closeable {
     return oneDimWriter.finish();
   }
 
+  // reused when writing leaf blocks
+  private final GrowableByteArrayDataOutput scratchOut = new GrowableByteArrayDataOutput(32*1024);
+
   private class OneDimensionBKDWriter {
 
     final IndexOutput out;
@@ -563,8 +568,8 @@ public class BKDWriter implements Closeable {
     final List<byte[]> leafBlockStartValues = new ArrayList<>();
     final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
     final int[] leafDocs = new int[maxPointsInLeafNode];
-    long valueCount;
-    int leafCount;
+    private long valueCount;
+    private int leafCount;
 
     OneDimensionBKDWriter(IndexOutput out) {
       if (numDims != 1) {
@@ -589,7 +594,7 @@ public class BKDWriter implements Closeable {
 
     // for asserts
     final byte[] lastPackedValue;
-    int lastDocID;
+    private int lastDocID;
 
     void add(byte[] packedValue, int docID) throws IOException {
       assert valueInOrder(valueCount + leafCount,
@@ -606,8 +611,7 @@ public class BKDWriter implements Closeable {
 
       if (leafCount == maxPointsInLeafNode) {
         // We write a block once we hit exactly the max count ... this is different from
-        // when we flush a new segment, where we write between max/2 and max per leaf block,
-        // so merged segments will behave differently from newly flushed segments:
+        // when we write N > 1 dimensional points where we write between max/2 and max per leaf block
         writeLeafBlock();
         leafCount = 0;
       }
@@ -644,7 +648,6 @@ public class BKDWriter implements Closeable {
     }
 
     private void writeLeafBlock() throws IOException {
-      //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
       assert leafCount != 0;
       if (valueCount == 0) {
         System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
@@ -660,42 +663,39 @@ public class BKDWriter implements Closeable {
       leafBlockFPs.add(out.getFilePointer());
       checkMaxLeafNodeCount(leafBlockFPs.size());
 
-      Arrays.fill(commonPrefixLengths, bytesPerDim);
       // Find per-dim common prefix:
-      for(int dim=0;dim<numDims;dim++) {
-        int offset1 = dim * bytesPerDim;
-        int offset2 = (leafCount - 1) * packedBytesLength + offset1;
-        for(int j=0;j<commonPrefixLengths[dim];j++) {
-          if (leafValues[offset1+j] != leafValues[offset2+j]) {
-            commonPrefixLengths[dim] = j;
-            break;
-          }
+      int prefix = bytesPerDim;
+      int offset = (leafCount - 1) * packedBytesLength;
+      for(int j=0;j<bytesPerDim;j++) {
+        if (leafValues[j] != leafValues[offset+j]) {
+          prefix = j;
+          break;
         }
       }
 
-      writeLeafBlockDocs(out, leafDocs, 0, leafCount);
-      writeCommonPrefixes(out, commonPrefixLengths, leafValues);
+      commonPrefixLengths[0] = prefix;
 
+      assert scratchOut.getPosition() == 0;
+      writeLeafBlockDocs(scratchOut, leafDocs, 0, leafCount);
+      writeCommonPrefixes(scratchOut, commonPrefixLengths, leafValues);
+
+      scratchBytesRef1.length = packedBytesLength;
+      scratchBytesRef1.bytes = leafValues;
+      
       final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
-        final BytesRef scratch = new BytesRef();
-
-        {
-          scratch.length = packedBytesLength;
-          scratch.bytes = leafValues;
-        }
-
         @Override
         public BytesRef apply(int i) {
-          scratch.offset = packedBytesLength * i;
-          return scratch;
+          scratchBytesRef1.offset = packedBytesLength * i;
+          return scratchBytesRef1;
         }
       };
       assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
           Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
           packedValues, leafDocs, 0);
-      writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
+      writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, leafCount, 0, packedValues);
+      out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
+      scratchOut.reset();
     }
-
   }
 
   // TODO: there must be a simpler way?
@@ -1259,13 +1259,13 @@ public class BKDWriter implements Closeable {
     out.writeBytes(packedIndex, 0, packedIndex.length);
   }
 
-  private void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
+  private void writeLeafBlockDocs(DataOutput out, int[] docIDs, int start, int count) throws IOException {
     assert count > 0: "maxPointsInLeafNode=" + maxPointsInLeafNode;
     out.writeVInt(count);
     DocIdsWriter.writeDocIds(docIDs, start, count, out);
   }
 
-  private void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+  private void writeLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
     int prefixLenSum = Arrays.stream(commonPrefixLengths).sum();
     if (prefixLenSum == packedBytesLength) {
       // all values in this block are equal
@@ -1290,7 +1290,7 @@ public class BKDWriter implements Closeable {
     }
   }
 
-  private void writeLeafBlockPackedValuesRange(IndexOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
+  private void writeLeafBlockPackedValuesRange(DataOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
     for (int i = start; i < end; ++i) {
       BytesRef ref = packedValues.apply(i);
       assert ref.length == packedBytesLength;
@@ -1316,7 +1316,7 @@ public class BKDWriter implements Closeable {
     return end - start;
   }
 
-  private void writeCommonPrefixes(IndexOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
+  private void writeCommonPrefixes(DataOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
     for(int dim=0;dim<numDims;dim++) {
       out.writeVInt(commonPrefixes[dim]);
       //System.out.println(commonPrefixes[dim] + " of " + bytesPerDim);
@@ -1449,7 +1449,8 @@ public class BKDWriter implements Closeable {
     }
   }
 
-  /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
+  /* Recursively reorders the provided reader and writes the bkd-tree on the fly; this method is used
+   * when we are writing a new segment directly from IndexWriter's indexing buffer (MutablePointsReader). */
   private void build(int nodeID, int leafNodeOffset,
                      MutablePointValues reader, int from, int to,
                      IndexOutput out,
@@ -1513,18 +1514,20 @@ public class BKDWriter implements Closeable {
       // Save the block file pointer:
       leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
 
+      assert scratchOut.getPosition() == 0;
+
       // Write doc IDs
       int[] docIDs = spareDocIds;
       for (int i = from; i < to; ++i) {
         docIDs[i - from] = reader.getDocID(i);
       }
       //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
-      writeLeafBlockDocs(out, docIDs, 0, count);
+      writeLeafBlockDocs(scratchOut, docIDs, 0, count);
 
       // Write the common prefixes:
       reader.getValue(from, scratchBytesRef1);
       System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
-      writeCommonPrefixes(out, commonPrefixLengths, scratch1);
+      writeCommonPrefixes(scratchOut, commonPrefixLengths, scratch1);
 
       // Write the full values:
       IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@@ -1536,7 +1539,10 @@ public class BKDWriter implements Closeable {
       };
       assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
           docIDs, 0);
-      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+      writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
+      
+      out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
+      scratchOut.reset();
 
     } else {
       // inner node
@@ -1577,7 +1583,8 @@ public class BKDWriter implements Closeable {
     }
   }
 
-  /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
+  /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to.
+  /*  This method is used when we are merging previously written segments, in the numDims > 1 case. */
   private void build(int nodeID, int leafNodeOffset,
                      PathSlice[] slices,
                      LongBitSet ordBitSet,
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
index 9dce5a8b52d..d76c6c7fdae 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
@@ -19,14 +19,14 @@ package org.apache.lucene.util.bkd;
 import java.io.IOException;
 
 import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
 
 class DocIdsWriter {
 
   private DocIdsWriter() {}
 
-  static void writeDocIds(int[] docIds, int start, int count, IndexOutput out) throws IOException {
+  static void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
     // docs can be sorted either when all docs in a block have the same value
     // or when a segment is sorted
     boolean sorted = true;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java b/lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java
similarity index 89%
rename from lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java
rename to lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java
index 37a7e4c5822..10992b72701 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.codecs.compressing;
 
+package org.apache.lucene.store;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
@@ -43,13 +43,13 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
       GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
       //explicitly write utf8 len so that we know how many bytes it occupies
       dataOutput.writeVInt(len);
-      int vintLen = dataOutput.length;
+      int vintLen = dataOutput.getPosition();
       // now write the string which will internally write number of bytes as a vint and then utf8 bytes
       dataOutput.writeString(unicode);
 
-      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.length);
+      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.getPosition());
       for (int j = 0, k = vintLen * 2; j < len; j++, k++) {
-        assertEquals(utf8[j], dataOutput.bytes[k]);
+        assertEquals(utf8[j], dataOutput.getBytes()[k]);
       }
     }
   }
@@ -67,13 +67,13 @@ public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
       GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
       //explicitly write utf8 len so that we know how many bytes it occupies
       dataOutput.writeVInt(len);
-      int vintLen = dataOutput.length;
+      int vintLen = dataOutput.getPosition();
       // now write the string which will internally write number of bytes as a vint and then utf8 bytes
       dataOutput.writeString(unicode);
 
-      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.length);
+      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.getPosition());
       for (int j = 0, k = vintLen * 2; j < len; j++, k++) {
-        assertEquals(utf8[j], dataOutput.bytes[k]);
+        assertEquals(utf8[j], dataOutput.getBytes()[k]);
       }
     }
   }

From 1055209940faec71bd8046af3323d5982529525b Mon Sep 17 00:00:00 2001
From: markrmiller <markrmiller@apache.org>
Date: Thu, 8 Dec 2016 12:03:55 -0500
Subject: [PATCH 26/53] SOLR-9834: A variety of spots in the code can create a
 collection zk node after the collection has been removed.

---
 solr/CHANGES.txt                              |   3 +
 .../solr/cloud/CloudConfigSetService.java     |  24 ++-
 .../apache/solr/cloud/CloudDescriptor.java    |   2 +-
 .../solr/cloud/CreateCollectionCmd.java       | 146 +++++++++++++++++-
 .../apache/solr/cloud/ElectionContext.java    |  20 +--
 .../org/apache/solr/cloud/LeaderElector.java  |   9 +-
 .../org/apache/solr/cloud/ZkController.java   | 128 +--------------
 .../solr/cloud/ZkSolrResourceLoader.java      |   2 +-
 .../apache/solr/core/ConfigSetService.java    |   7 +-
 .../handler/admin/CollectionsHandler.java     |   2 +
 .../apache/solr/cloud/LeaderElectionTest.java |   2 +
 ...rseerCollectionConfigSetProcessorTest.java |  33 ++--
 .../apache/solr/cloud/ZkSolrClientTest.java   |  54 +++++++
 .../solr/common/cloud/SolrZkClient.java       |  31 +++-
 .../solr/common/cloud/ZkCmdExecutor.java      |  15 +-
 15 files changed, 308 insertions(+), 170 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8dee8379901..abd99978022 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -230,6 +230,9 @@ Bug Fixes
 
 * SOLR-9832: Schema modifications are not immediately visible on the coordinating node. (Steve Rowe)
 
+* SOLR-9834: A variety of spots in the code can create a collection zk node after the collection has been 
+  removed. (Mark Miller)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
index bf11e921850..6e0583f2ca1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
@@ -16,12 +16,20 @@
  */
 package org.apache.solr.cloud;
 
+import java.lang.invoke.MethodHandles;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.core.ConfigSetService;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class CloudConfigSetService extends ConfigSetService {
-
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  
   private final ZkController zkController;
 
   public CloudConfigSetService(SolrResourceLoader loader, ZkController zkController) {
@@ -31,8 +39,18 @@ public class CloudConfigSetService extends ConfigSetService {
 
   @Override
   public SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd) {
-    // TODO: Shouldn't the collection node be created by the Collections API?
-    zkController.createCollectionZkNode(cd.getCloudDescriptor());
+    try {
+      // for back compat with cores that can create collections without the collections API
+      if (!zkController.getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cd.getCollectionName(), true)) {
+        CreateCollectionCmd.createCollectionZkNode(zkController.getZkClient(), cd.getCollectionName(), cd.getCloudDescriptor().getParams());
+      }
+    } catch (KeeperException e) {
+      SolrException.log(log, null, e);
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      SolrException.log(log, null, e);
+    }
+
     String configName = zkController.getZkStateReader().readConfigName(cd.getCollectionName());
     return new ZkSolrResourceLoader(cd.getInstanceDir(), configName, parentLoader.getClassLoader(),
         cd.getSubstitutableProperties(), zkController);
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java b/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
index 4dd1527b0c1..fdc7b02dae5 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
@@ -33,7 +33,7 @@ public class CloudDescriptor {
   private String roles = null;
   private Integer numShards;
   private String nodeName = null;
-  private Map<String, String> collectionParams = new HashMap<>();
+  private Map<String,String> collectionParams = new HashMap<>();
 
   private volatile boolean isLeader = false;
   
diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
index a067b4ae65f..a1bb70e36ab 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
@@ -25,19 +25,23 @@ import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd;
 import org.apache.solr.cloud.overseer.ClusterStateMutator;
 import org.apache.solr.cloud.rule.ReplicaAssigner;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocRouter;
 import org.apache.solr.common.cloud.ImplicitDocRouter;
 import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkConfigManager;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -46,7 +50,9 @@ import org.apache.solr.common.util.Utils;
 import org.apache.solr.handler.component.ShardHandler;
 import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.util.TimeOut;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,9 +70,11 @@ import static org.apache.solr.common.util.StrUtils.formatString;
 public class CreateCollectionCmd implements Cmd {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   private final OverseerCollectionMessageHandler ocmh;
+  private SolrZkClient zkClient;
 
   public CreateCollectionCmd(OverseerCollectionMessageHandler ocmh) {
     this.ocmh = ocmh;
+    this.zkClient = ocmh.zkStateReader.getZkClient();
   }
 
   @Override
@@ -84,7 +92,6 @@ public class CreateCollectionCmd implements Cmd {
 
     ocmh.validateConfigOrThrowSolrException(configName);
 
-
     try {
       // look at the replication factor and see if it matches reality
       // if it does not, find best nodes to create more cores
@@ -157,10 +164,20 @@ public class CreateCollectionCmd implements Cmd {
       }
 
       ZkStateReader zkStateReader = ocmh.zkStateReader;
-      boolean isLegacyCloud =  Overseer.isLegacy(zkStateReader);
+      boolean isLegacyCloud = Overseer.isLegacy(zkStateReader);
 
       ocmh.createConfNode(configName, collectionName, isLegacyCloud);
 
+      Map<String,String> collectionParams = new HashMap<>();
+      Map<String,Object> collectionProps = message.getProperties();
+      for (String propName : collectionProps.keySet()) {
+        if (propName.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
+          collectionParams.put(propName.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), (String) collectionProps.get(propName));
+        }
+      }
+      
+      createCollectionZkNode(zkClient, collectionName, collectionParams);
+      
       Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
 
       // wait for a while until we don't see the collection
@@ -288,4 +305,129 @@ public class CreateCollectionCmd implements Cmd {
     }
     return configName;
   }
+  
+  public static void createCollectionZkNode(SolrZkClient zkClient, String collection, Map<String,String> params) {
+    log.debug("Check for collection zkNode:" + collection);
+    String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
+
+    try {
+      if (!zkClient.exists(collectionPath, true)) {
+        log.debug("Creating collection in ZooKeeper:" + collection);
+
+        try {
+          Map<String,Object> collectionProps = new HashMap<>();
+
+          // TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
+          String defaultConfigName = System.getProperty(ZkController.COLLECTION_PARAM_PREFIX + ZkController.CONFIGNAME_PROP, collection);
+
+          if (params.size() > 0) {
+            collectionProps.putAll(params);
+            // if the config name wasn't passed in, use the default
+            if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP)) {
+              // users can create the collection node and conf link ahead of time, or this may return another option
+              getConfName(zkClient, collection, collectionPath, collectionProps);
+            }
+
+          } else if (System.getProperty("bootstrap_confdir") != null) {
+            // if we are bootstrapping a collection, default the config for
+            // a new collection to the collection we are bootstrapping
+            log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
+
+            Properties sysProps = System.getProperties();
+            for (String sprop : System.getProperties().stringPropertyNames()) {
+              if (sprop.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
+                collectionProps.put(sprop.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
+              }
+            }
+
+            // if the config name wasn't passed in, use the default
+            if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP))
+              collectionProps.put(ZkController.CONFIGNAME_PROP, defaultConfigName);
+
+          } else if (Boolean.getBoolean("bootstrap_conf")) {
+            // the conf name should should be the collection name of this core
+            collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
+          } else {
+            getConfName(zkClient, collection, collectionPath, collectionProps);
+          }
+
+          collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP);  // we don't put numShards in the collections properties
+
+          ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
+          zkClient.makePath(collectionPath, Utils.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
+
+        } catch (KeeperException e) {
+          // it's okay if the node already exists
+          if (e.code() != KeeperException.Code.NODEEXISTS) {
+            throw e;
+          }
+        }
+      } else {
+        log.debug("Collection zkNode exists");
+      }
+
+    } catch (KeeperException e) {
+      // it's okay if another beats us creating the node
+      if (e.code() == KeeperException.Code.NODEEXISTS) {
+        return;
+      }
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
+    } catch (InterruptedException e) {
+      Thread.interrupted();
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
+    }
+
+  }
+  
+  private static void getConfName(SolrZkClient zkClient, String collection, String collectionPath, Map<String,Object> collectionProps) throws KeeperException,
+      InterruptedException {
+    // check for configName
+    log.debug("Looking for collection configName");
+    if (collectionProps.containsKey("configName")) {
+      log.info("configName was passed as a param {}", collectionProps.get("configName"));
+      return;
+    }
+    
+    List<String> configNames = null;
+    int retry = 1;
+    int retryLimt = 6;
+    for (; retry < retryLimt; retry++) {
+      if (zkClient.exists(collectionPath, true)) {
+        ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true));
+        if (cProps.containsKey(ZkController.CONFIGNAME_PROP)) {
+          break;
+        }
+      }
+
+      // if there is only one conf, use that
+      try {
+        configNames = zkClient.getChildren(ZkConfigManager.CONFIGS_ZKNODE, null,
+            true);
+      } catch (NoNodeException e) {
+        // just keep trying
+      }
+      if (configNames != null && configNames.size() == 1) {
+        // no config set named, but there is only 1 - use it
+        log.info("Only one config set found in zk - using it:" + configNames.get(0));
+        collectionProps.put(ZkController.CONFIGNAME_PROP, configNames.get(0));
+        break;
+      }
+
+      if (configNames != null && configNames.contains(collection)) {
+        log.info(
+            "Could not find explicit collection configName, but found config name matching collection name - using that set.");
+        collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
+        break;
+      }
+
+      log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
+      Thread.sleep(3000);
+    }
+    if (retry == retryLimt) {
+      log.error("Could not find configName for collection " + collection);
+      throw new ZooKeeperException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Could not find configName for collection " + collection + " found:" + configNames);
+    }
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index 183f1774ee7..b3cd58566d9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -125,17 +125,6 @@ class ShardLeaderElectionContextBase extends ElectionContext {
     this.zkClient = zkStateReader.getZkClient();
     this.shardId = shardId;
     this.collection = collection;
-
-    try {
-      new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout())
-          .ensureExists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection,
-              zkClient);
-    } catch (KeeperException e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, e);
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      throw new SolrException(ErrorCode.SERVER_ERROR, e);
-    }
   }
   
   @Override
@@ -175,9 +164,16 @@ class ShardLeaderElectionContextBase extends ElectionContext {
   void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStartMs)
       throws KeeperException, InterruptedException, IOException {
     // register as leader - if an ephemeral is already there, wait to see if it goes away
+    
+    if (!zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
+      log.info("Will not register as leader because collection appears to be gone.");
+      return;
+    }
+    
     String parent = new Path(leaderPath).getParent().toString();
     ZkCmdExecutor zcmd = new ZkCmdExecutor(30000);
-    zcmd.ensureExists(parent, zkClient);
+    // only if /collections/{collection} exists already do we succeed in creating this path
+    zcmd.ensureExists(parent, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
 
     try {
       RetryUtil.retryOnThrowable(NodeExistsException.class, 60000, 5000, () -> {
diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
index 71fdcfd8088..aa8943d68b1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
+++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
@@ -360,8 +360,13 @@ public  class LeaderElector {
   public void setup(final ElectionContext context) throws InterruptedException,
       KeeperException {
     String electZKPath = context.electionPath + LeaderElector.ELECTION_NODE;
-    
-    zkCmdExecutor.ensureExists(electZKPath, zkClient);
+    if (context instanceof OverseerElectionContext) {
+      zkCmdExecutor.ensureExists(electZKPath, zkClient);
+    } else {
+      // we use 2 param so that replica won't create /collection/{collection} if it doesn't exist
+      zkCmdExecutor.ensureExists(electZKPath, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
+    }
+
     this.context = context;
   }
   
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c0a8d555000..eba7067a90a 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -34,7 +34,6 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentHashMap;
@@ -1273,130 +1272,6 @@ public class ZkController {
     zkClient.printLayoutToStdOut();
   }
 
-  public void createCollectionZkNode(CloudDescriptor cd) {
-    String collection = cd.getCollectionName();
-
-    log.debug("Check for collection zkNode:" + collection);
-    String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
-
-    try {
-      if (!zkClient.exists(collectionPath, true)) {
-        log.debug("Creating collection in ZooKeeper:" + collection);
-
-        try {
-          Map<String, Object> collectionProps = new HashMap<>();
-
-          // TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
-          String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX + CONFIGNAME_PROP, collection);
-
-          // params passed in - currently only done via core admin (create core commmand).
-          if (cd.getParams().size() > 0) {
-            collectionProps.putAll(cd.getParams());
-            // if the config name wasn't passed in, use the default
-            if (!collectionProps.containsKey(CONFIGNAME_PROP)) {
-              // TODO: getting the configName from the collectionPath should fail since we already know it doesn't exist?
-              getConfName(collection, collectionPath, collectionProps);
-            }
-
-          } else if (System.getProperty("bootstrap_confdir") != null) {
-            // if we are bootstrapping a collection, default the config for
-            // a new collection to the collection we are bootstrapping
-            log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
-
-            Properties sysProps = System.getProperties();
-            for (String sprop : System.getProperties().stringPropertyNames()) {
-              if (sprop.startsWith(COLLECTION_PARAM_PREFIX)) {
-                collectionProps.put(sprop.substring(COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
-              }
-            }
-
-            // if the config name wasn't passed in, use the default
-            if (!collectionProps.containsKey(CONFIGNAME_PROP))
-              collectionProps.put(CONFIGNAME_PROP, defaultConfigName);
-
-          } else if (Boolean.getBoolean("bootstrap_conf")) {
-            // the conf name should should be the collection name of this core
-            collectionProps.put(CONFIGNAME_PROP, cd.getCollectionName());
-          } else {
-            getConfName(collection, collectionPath, collectionProps);
-          }
-
-          collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP);  // we don't put numShards in the collections properties
-
-          ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
-          zkClient.makePath(collectionPath, Utils.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
-
-        } catch (KeeperException e) {
-          // it's okay if the node already exists
-          if (e.code() != KeeperException.Code.NODEEXISTS) {
-            throw e;
-          }
-        }
-      } else {
-        log.debug("Collection zkNode exists");
-      }
-
-    } catch (KeeperException e) {
-      // it's okay if another beats us creating the node
-      if (e.code() == KeeperException.Code.NODEEXISTS) {
-        return;
-      }
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
-    } catch (InterruptedException e) {
-      Thread.interrupted();
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
-    }
-
-  }
-
-
-  private void getConfName(String collection, String collectionPath,
-                           Map<String, Object> collectionProps) throws KeeperException,
-      InterruptedException {
-    // check for configName
-    log.debug("Looking for collection configName");
-    List<String> configNames = null;
-    int retry = 1;
-    int retryLimt = 6;
-    for (; retry < retryLimt; retry++) {
-      if (zkClient.exists(collectionPath, true)) {
-        ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true));
-        if (cProps.containsKey(CONFIGNAME_PROP)) {
-          break;
-        }
-      }
-
-      // if there is only one conf, use that
-      try {
-        configNames = zkClient.getChildren(ZkConfigManager.CONFIGS_ZKNODE, null,
-            true);
-      } catch (NoNodeException e) {
-        // just keep trying
-      }
-      if (configNames != null && configNames.size() == 1) {
-        // no config set named, but there is only 1 - use it
-        log.info("Only one config set found in zk - using it:" + configNames.get(0));
-        collectionProps.put(CONFIGNAME_PROP, configNames.get(0));
-        break;
-      }
-
-      if (configNames != null && configNames.contains(collection)) {
-        log.info("Could not find explicit collection configName, but found config name matching collection name - using that set.");
-        collectionProps.put(CONFIGNAME_PROP, collection);
-        break;
-      }
-
-      log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
-      Thread.sleep(3000);
-    }
-    if (retry == retryLimt) {
-      log.error("Could not find configName for collection " + collection);
-      throw new ZooKeeperException(
-          SolrException.ErrorCode.SERVER_ERROR,
-          "Could not find configName for collection " + collection + " found:" + configNames);
-    }
-  }
-
   public ZkStateReader getZkStateReader() {
     return zkStateReader;
   }
@@ -2175,7 +2050,8 @@ public class ZkController {
     } else {
       String parentZNodePath = getLeaderInitiatedRecoveryZnodePath(collection, shardId);
       try {
-        zkClient.makePath(parentZNodePath, retryOnConnLoss);
+        // make sure we don't create /collections/{collection} if they do not exist with 2 param
+        zkClient.makePath(parentZNodePath, (byte[]) null, CreateMode.PERSISTENT, (Watcher) null, true, retryOnConnLoss, 2);
       } catch (KeeperException.NodeExistsException nee) {
         // if it exists, that's great!
       }
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java b/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
index 209ca68c566..b4137b31d6c 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
@@ -109,7 +109,7 @@ public class ZkSolrResourceLoader extends SolrResourceLoader {
       } catch (InterruptedException e) {
         Thread.currentThread().interrupt();
         throw new IOException("Error opening " + file, e);
-      } catch (KeeperException e) {
+      } catch (Exception e) {
         throw new IOException("Error opening " + file, e);
       }
     }
diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
index 3f47f467a14..e4a135e4c65 100644
--- a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
+++ b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
@@ -78,11 +78,10 @@ public abstract class ConfigSetService {
       IndexSchema schema = createIndexSchema(dcore, solrConfig);
       NamedList properties = createConfigSetProperties(dcore, coreLoader);
       return new ConfigSet(configName(dcore), solrConfig, schema, properties);
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
-                              "Could not load conf for core " + dcore.getName() + 
-                              ": " + e.getMessage(), e);
+          "Could not load conf for core " + dcore.getName() +
+              ": " + e.getMessage(), e);
     }
 
   }
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 01095a1143b..1915176070d 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -346,9 +346,11 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
     try {
       String path = ZkStateReader.CONFIGS_ZKNODE + "/" + SYSTEM_COLL + "/schema.xml";
       byte[] data = IOUtils.toByteArray(Thread.currentThread().getContextClassLoader().getResourceAsStream("SystemCollectionSchema.xml"));
+      assert data != null && data.length > 0;
       cmdExecutor.ensureExists(path, data, CreateMode.PERSISTENT, zk);
       path = ZkStateReader.CONFIGS_ZKNODE + "/" + SYSTEM_COLL + "/solrconfig.xml";
       data = IOUtils.toByteArray(Thread.currentThread().getContextClassLoader().getResourceAsStream("SystemCollectionSolrConfig.xml"));
+      assert data != null && data.length > 0;
       cmdExecutor.ensureExists(path, data, CreateMode.PERSISTENT, zk);
     } catch (IOException e) {
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
index 8e1be10959a..2582872259a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
@@ -80,6 +80,8 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
     zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
     zkStateReader = new ZkStateReader(zkClient);
     seqToThread = Collections.synchronizedMap(new HashMap<Integer,Thread>());
+    zkClient.makePath("/collections/collection1", true);
+    zkClient.makePath("/collections/collection2", true);
   }
   
   class TestLeaderElectionContext extends ShardLeaderElectionContextBase {
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
index 239afa1f60d..6a7906db907 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
@@ -44,6 +44,7 @@ import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.handler.component.ShardResponse;
 import org.apache.solr.util.TimeOut;
 import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.Watcher;
 import org.easymock.Capture;
 import org.easymock.EasyMock;
 import org.junit.After;
@@ -114,7 +115,6 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     zkStateReaderMock = createMock(ZkStateReader.class);
     clusterStateMock = createMock(ClusterState.class);
     solrZkClientMock = createMock(SolrZkClient.class);
-
   }
   
   @AfterClass
@@ -143,9 +143,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     reset(zkStateReaderMock);
     reset(clusterStateMock);
     reset(solrZkClientMock);
-    underTest = new OverseerCollectionConfigSetProcessorToBeTested(zkStateReaderMock,
-        "1234", shardHandlerFactoryMock, ADMIN_PATH, workQueueMock, runningMapMock,
-        completedMapMock, failureMapMock);
+
     zkMap.clear();
     collectionsSet.clear();
   }
@@ -157,12 +155,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
   }
   
   protected Set<String> commonMocks(int liveNodesCount) throws Exception {
-
     shardHandlerFactoryMock.getShardHandler();
     expectLastCall().andAnswer(() -> {
       log.info("SHARDHANDLER");
       return shardHandlerMock;
     }).anyTimes();
+    
     workQueueMock.peekTopN(EasyMock.anyInt(), anyObject(Predicate.class), EasyMock.anyLong());
     expectLastCall().andAnswer(() -> {
       Object result;
@@ -203,12 +201,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     
     workQueueMock.poll();
     expectLastCall().andAnswer(() -> queue.poll()).anyTimes();
-
-    zkStateReaderMock.getClusterState();
-    expectLastCall().andAnswer(() -> clusterStateMock).anyTimes();
     
     zkStateReaderMock.getZkClient();
     expectLastCall().andAnswer(() -> solrZkClientMock).anyTimes();
+    
+    zkStateReaderMock.getClusterState();
+    expectLastCall().andAnswer(() -> clusterStateMock).anyTimes();
 
     zkStateReaderMock.updateClusterState();
 
@@ -262,6 +260,18 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
       String key = (String) getCurrentArguments()[0];
       return key;
     }).anyTimes();
+    
+    solrZkClientMock.makePath(anyObject(String.class), anyObject(byte[].class), anyObject(CreateMode.class), anyObject(Watcher.class), anyBoolean());
+    expectLastCall().andAnswer(() -> {
+      String key = (String) getCurrentArguments()[0];
+      return key;
+    }).anyTimes();
+    
+    solrZkClientMock.makePath(anyObject(String.class), anyObject(byte[].class), anyObject(CreateMode.class), anyObject(Watcher.class), anyBoolean(), anyBoolean(), anyInt());
+    expectLastCall().andAnswer(() -> {
+      String key = (String) getCurrentArguments()[0];
+      return key;
+    }).anyTimes();
 
     solrZkClientMock.exists(anyObject(String.class),anyBoolean());
     expectLastCall().andAnswer(() -> {
@@ -518,12 +528,17 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
           replicationFactor);
     }
     
-    replay(workQueueMock);
     replay(solrZkClientMock);
     replay(zkStateReaderMock);
+    replay(workQueueMock);
     replay(clusterStateMock);
     replay(shardHandlerFactoryMock);
     replay(shardHandlerMock);
+    
+    
+    underTest = new OverseerCollectionConfigSetProcessorToBeTested(zkStateReaderMock,
+        "1234", shardHandlerFactoryMock, ADMIN_PATH, workQueueMock, runningMapMock,
+        completedMapMock, failureMapMock);
 
 
     log.info("clusterstate " + clusterStateMock.hashCode());
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
index 39ef1b8394d..faa2ba74a05 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
@@ -26,6 +26,7 @@ import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkCmdExecutor;
 import org.apache.solr.common.cloud.ZkOperation;
 import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
@@ -304,6 +305,59 @@ public class ZkSolrClientTest extends AbstractSolrTestCase {
 
     }
   }
+  
+  public void testSkipPathPartsOnMakePath() throws Exception {
+    try (ZkConnection conn = new ZkConnection()) {
+      final SolrZkClient zkClient = conn.getClient();
+
+      zkClient.makePath("/test", true);
+
+      // should work
+      zkClient.makePath("/test/path/here", (byte[]) null, CreateMode.PERSISTENT, (Watcher) null, true, true, 1);
+
+      zkClient.clean("/");
+
+      // should not work
+      try {
+        zkClient.makePath("/test/path/here", (byte[]) null, CreateMode.PERSISTENT, (Watcher) null, true, true, 1);
+        fail("We should not be able to create this path");
+      } catch (Exception e) {
+
+      }
+
+      zkClient.clean("/");
+
+      ZkCmdExecutor zkCmdExecutor = new ZkCmdExecutor(30000);
+      try {
+        zkCmdExecutor.ensureExists("/collection/collection/leader", (byte[]) null, CreateMode.PERSISTENT, zkClient, 2);
+        fail("We should not be able to create this path");
+      } catch (Exception e) {
+
+      }
+
+      zkClient.makePath("/collection", true);
+
+      try {
+        zkCmdExecutor.ensureExists("/collections/collection/leader", (byte[]) null, CreateMode.PERSISTENT, zkClient, 2);
+        fail("We should not be able to create this path");
+      } catch (Exception e) {
+
+      }
+      zkClient.makePath("/collection/collection", true);
+ 
+      byte[] bytes = new byte[10];
+      zkCmdExecutor.ensureExists("/collection/collection", bytes, CreateMode.PERSISTENT, zkClient, 2);
+      
+      byte[] returnedBytes = zkClient.getData("/collection/collection", null, null, true);
+      
+      assertNull("We skipped 2 path parts, so data won't be written", returnedBytes);
+
+      zkClient.makePath("/collection/collection/leader", true);
+
+      zkCmdExecutor.ensureExists("/collection/collection/leader", (byte[]) null, CreateMode.PERSISTENT, zkClient, 2);
+
+    }
+  }
 
   @Override
   public void tearDown() throws Exception {
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 422d9e5d7a9..3f8deea5ace 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -411,13 +411,13 @@ public class SolrZkClient implements Closeable {
 
   public void makePath(String path, boolean failOnExists, boolean retryOnConnLoss) throws KeeperException,
       InterruptedException {
-    makePath(path, null, CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss);
+    makePath(path, null, CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss, 0);
   }
 
   public void makePath(String path, File file, boolean failOnExists, boolean retryOnConnLoss)
       throws IOException, KeeperException, InterruptedException {
     makePath(path, FileUtils.readFileToByteArray(file),
-        CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss);
+        CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss, 0);
   }
 
   public void makePath(String path, File file, boolean retryOnConnLoss) throws IOException,
@@ -463,21 +463,35 @@ public class SolrZkClient implements Closeable {
    */
   public void makePath(String path, byte[] data, CreateMode createMode,
       Watcher watcher, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
-    makePath(path, data, createMode, watcher, true, retryOnConnLoss);
+    makePath(path, data, createMode, watcher, true, retryOnConnLoss, 0);
   }
-
-
+  
   /**
    * Creates the path in ZooKeeper, creating each node as necessary.
    *
    * e.g. If <code>path=/solr/group/node</code> and none of the nodes, solr,
    * group, node exist, each will be created.
    *
+   * @param data to set on the last zkNode
+   */
+  public void makePath(String path, byte[] data, CreateMode createMode,
+      Watcher watcher, boolean failOnExists, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
+    makePath(path, data, createMode, watcher, failOnExists, retryOnConnLoss, 0);
+  }
+
+  /**
+   * Creates the path in ZooKeeper, creating each node as necessary.
+   *
+   * e.g. If <code>path=/solr/group/node</code> and none of the nodes, solr,
+   * group, node exist, each will be created.
+   * 
+   * skipPathParts will force the call to fail if the first skipPathParts do not exist already.
+   *
    * Note: retryOnConnLoss is only respected for the final node - nodes
    * before that are always retried on connection loss.
    */
   public void makePath(String path, byte[] data, CreateMode createMode,
-      Watcher watcher, boolean failOnExists, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
+      Watcher watcher, boolean failOnExists, boolean retryOnConnLoss, int skipPathParts) throws KeeperException, InterruptedException {
     log.debug("makePath: {}", path);
     boolean retry = true;
 
@@ -487,9 +501,12 @@ public class SolrZkClient implements Closeable {
     String[] paths = path.split("/");
     StringBuilder sbPath = new StringBuilder();
     for (int i = 0; i < paths.length; i++) {
-      byte[] bytes = null;
       String pathPiece = paths[i];
       sbPath.append("/" + pathPiece);
+      if (i < skipPathParts) {
+        continue;
+      }
+      byte[] bytes = null;
       final String currentPath = sbPath.toString();
       Object exists = exists(currentPath, watcher, retryOnConnLoss);
       if (exists == null || ((i == paths.length -1) && failOnExists)) {
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
index 0f50f0a81ce..c27f7671bc8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
@@ -75,17 +75,26 @@ public class ZkCmdExecutor {
   }
   
   public void ensureExists(String path, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
-    ensureExists(path, null, CreateMode.PERSISTENT, zkClient);
+    ensureExists(path, null, CreateMode.PERSISTENT, zkClient, 0);
+  }
+  
+  
+  public void ensureExists(String path, final byte[] data, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
+    ensureExists(path, data, CreateMode.PERSISTENT, zkClient, 0);
+  }
+  
+  public void ensureExists(String path, final byte[] data, CreateMode createMode, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
+    ensureExists(path, data, createMode, zkClient, 0);
   }
   
   public void ensureExists(final String path, final byte[] data,
-      CreateMode createMode, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
+      CreateMode createMode, final SolrZkClient zkClient, int skipPathParts) throws KeeperException, InterruptedException {
     
     if (zkClient.exists(path, true)) {
       return;
     }
     try {
-      zkClient.makePath(path, data, createMode, true);
+      zkClient.makePath(path, data, createMode, null, true, true, skipPathParts);
     } catch (NodeExistsException e) {
       // it's okay if another beats us creating the node
     }

From 93c11462bbe2c442f20a6d090911c5a1a4546564 Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Thu, 8 Dec 2016 18:17:25 -0500
Subject: [PATCH 27/53] fix stale comment

---
 .../apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
index 9d2db890fa0..c3217f30156 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
@@ -72,7 +72,7 @@ class SimpleTextPointsWriter extends PointsWriter {
     PointValues values = reader.getValues(fieldInfo.name);
     boolean singleValuePerDoc = values.size() == values.getDocCount();
 
-    // We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
+    // We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
     try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(),
                                                               writeState.directory,
                                                               writeState.segmentInfo.name,

From 1d2e440a8fe3df8d3207a7428841f79f63381e4f Mon Sep 17 00:00:00 2001
From: yonik <yonik@apache.org>
Date: Thu, 8 Dec 2016 18:29:07 -0500
Subject: [PATCH 28/53] SOLR-9837: fix redundant calculation of docsWithField
 for numeric fields in field cache

---
 solr/CHANGES.txt                              |  4 ++
 .../solr/uninverting/FieldCacheImpl.java      | 52 ++++++++++---------
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index abd99978022..78f7f5580f9 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -50,6 +50,10 @@ Bug Fixes
 * SOLR-9262: Connection and read timeouts are being ignored by UpdateShardHandler after SOLR-4509.
   (Mark Miller, shalin)
 
+* SOLR-9837: Fix 55% performance regression of FieldCache uninvert time of
+  numeric fields.  (yonik)
+
+
 Optimizations
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
index 0ca687f3952..90be40070ba 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
@@ -365,8 +365,6 @@ public class FieldCacheImpl implements FieldCache {
       }
     }
 
-    /** @deprecated remove this when legacy numerics are removed */
-    @Deprecated
     protected abstract TermsEnum termsEnum(Terms terms) throws IOException;
     protected abstract void visitTerm(BytesRef term);
     protected abstract void visitDoc(int docID);
@@ -632,20 +630,21 @@ public class FieldCacheImpl implements FieldCache {
         }
       }
 
-      Bits docsWithField = getDocsWithField(reader, field, parser);
-      return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator(docsWithField);
+      return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator();
     }
   }
 
-  static class LongsFromArray implements Accountable {
+  public static class LongsFromArray implements Accountable {
     private final PackedInts.Reader values;
     private final long minValue;
+    private final Bits docsWithField;
     private final String field;
 
-    public LongsFromArray(String field, PackedInts.Reader values, long minValue) {
+    public LongsFromArray(String field, PackedInts.Reader values, long minValue, Bits docsWithField) { // TODO: accept null docsWithField?
       this.field = field;
       this.values = values;
       this.minValue = minValue;
+      this.docsWithField = docsWithField;
     }
     
     @Override
@@ -653,7 +652,7 @@ public class FieldCacheImpl implements FieldCache {
       return values.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJECT_REF + Long.BYTES;
     }
 
-    public NumericDocValues iterator(final Bits docsWithField) {
+    public NumericDocValues iterator() {
       return new NumericDocValues() {
         int docID = -1;
 
@@ -767,10 +766,11 @@ public class FieldCacheImpl implements FieldCache {
       u.uninvert(reader, key.field);
       wrapper.setDocsWithField(reader, key.field, u.docsWithField, parser);
       GrowableWriterAndMinValue values = valuesRef.get();
+      Bits docsWithField = u.docsWithField == null ? new Bits.MatchNoBits(reader.maxDoc()) : u.docsWithField;
       if (values == null) {
-        return new LongsFromArray(key.field, new PackedInts.NullReader(reader.maxDoc()), 0L);
+        return new LongsFromArray(key.field, new PackedInts.NullReader(reader.maxDoc()), 0L, docsWithField);
       }
-      return new LongsFromArray(key.field, values.writer.getMutable(), values.minValue);
+      return new LongsFromArray(key.field, values.writer.getMutable(), values.minValue, docsWithField);
     }
   }
 
@@ -993,16 +993,18 @@ public class FieldCacheImpl implements FieldCache {
     }
   }
 
-  private static class BinaryDocValuesImpl implements Accountable {
+  public static class BinaryDocValuesImpl implements Accountable {
     private final PagedBytes.Reader bytes;
     private final PackedInts.Reader docToOffset;
+    private final Bits docsWithField;
 
-    public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) {
+    public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, Bits docsWithField) {
       this.bytes = bytes;
       this.docToOffset = docToOffset;
+      this.docsWithField = docsWithField;
     }
     
-    public BinaryDocValues iterator(Bits docsWithField) {
+    public BinaryDocValues iterator() {
       return new BinaryDocValues() {
 
         final BytesRef term = new BytesRef();
@@ -1109,7 +1111,7 @@ public class FieldCacheImpl implements FieldCache {
     }
 
     BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
-    return impl.iterator(getDocsWithField(reader, field, null));
+    return impl.iterator();
   }
 
   static final class BinaryDocValuesCache extends Cache {
@@ -1188,19 +1190,21 @@ public class FieldCacheImpl implements FieldCache {
       }
 
       final PackedInts.Reader offsetReader = docToOffset.getMutable();
-      wrapper.setDocsWithField(reader, key.field, new Bits() {
-          @Override
-          public boolean get(int index) {
-            return offsetReader.get(index) != 0;
-          }
+      Bits docsWithField = new Bits() {
+        @Override
+        public boolean get(int index) {
+          return offsetReader.get(index) != 0;
+        }
 
-          @Override
-          public int length() {
-            return maxDoc;
-          }
-        }, null);
+        @Override
+        public int length() {
+          return maxDoc;
+        }
+      };
+
+      wrapper.setDocsWithField(reader, key.field, docsWithField, null);
       // maybe an int-only impl?
-      return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader);
+      return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
     }
   }
 

From c185617582b4bf3ce2899c9ae67e9eeaf2c21741 Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Thu, 8 Dec 2016 18:34:51 -0500
Subject: [PATCH 29/53] LUCENE-7583: move this class to the right package

---
 .../lucene/{util => store}/GrowableByteArrayDataOutput.java       | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename lucene/core/src/java/org/apache/lucene/{util => store}/GrowableByteArrayDataOutput.java (100%)

diff --git a/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java b/lucene/core/src/java/org/apache/lucene/store/GrowableByteArrayDataOutput.java
similarity index 100%
rename from lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
rename to lucene/core/src/java/org/apache/lucene/store/GrowableByteArrayDataOutput.java

From 22d04a7c1149c1af42dc2890a416fc45e4d0aa5e Mon Sep 17 00:00:00 2001
From: Uwe Schindler <uschindler@apache.org>
Date: Fri, 9 Dec 2016 18:36:37 +0100
Subject: [PATCH 30/53] LUCENE-6989: Fix Exception handling in MMapDirectory's
 unmap hack support code to work with Java 9's new InaccessibleObjectException
 that does not extend ReflectiveAccessException in Java 9.

---
 lucene/CHANGES.txt                                           | 5 +++++
 .../core/src/java/org/apache/lucene/store/MMapDirectory.java | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 26a9dec0014..b9deb7e1768 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -85,6 +85,11 @@ Bug Fixes
   the incoming automaton is a special case and throw a clearer
   exception than NullPointerException (Tom Mortimer via Mike McCandless)
 
+* LUCENE-6989: Fix Exception handling in MMapDirectory's unmap hack
+  support code to work with Java 9's new InaccessibleObjectException
+  that does not extend ReflectiveAccessException in Java 9.
+  (Uwe Schindler)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
index c0e35197f0e..be08a1663a6 100644
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@@ -385,13 +385,13 @@ public class MMapDirectory extends FSDirectory {
           }
         }
       };
-    } catch (ReflectiveOperationException e) {
-      return "Unmapping is not supported on this platform, because internal Java APIs are not compatible to this Lucene version: " + e; 
     } catch (SecurityException e) {
       return "Unmapping is not supported, because not all required permissions are given to the Lucene JAR file: " + e +
           " [Please grant at least the following permissions: RuntimePermission(\"accessClassInPackage.sun.misc\"), " +
           "RuntimePermission(\"accessClassInPackage.jdk.internal.ref\"), and " +
           "ReflectPermission(\"suppressAccessChecks\")]";
+    } catch (ReflectiveOperationException | RuntimeException e) {
+      return "Unmapping is not supported on this platform, because internal Java APIs are not compatible to this Lucene version: " + e; 
     }
   }
   

From 4efbde4e76277f364952866c071bb953ca2be070 Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Fri, 9 Dec 2016 18:05:13 -0500
Subject: [PATCH 31/53] LUCENE-7581: don't allow updating a doc values field if
 it's used in the index sort

---
 lucene/CHANGES.txt                            |  4 +++
 .../org/apache/lucene/index/IndexWriter.java  |  6 ++++
 .../lucene/index/IndexWriterConfig.java       |  3 ++
 .../lucene/index/LiveIndexWriterConfig.java   | 13 ++++++++
 .../apache/lucene/index/TestIndexSorting.java | 30 +++++++++++++++++--
 5 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b9deb7e1768..da6e3d29508 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -90,6 +90,10 @@ Bug Fixes
   that does not extend ReflectiveAccessException in Java 9.
   (Uwe Schindler)
 
+* LUCENE-7581: Lucene now prevents updating a doc values field that is used
+  in the index sort, since this would lead to corruption.  (Jim
+  Ferenczi via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 98687855231..3ee87b18304 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -1619,6 +1619,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
     if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) {
       throw new IllegalArgumentException("can only update existing numeric-docvalues fields!");
     }
+    if (config.getIndexSortFields().contains(field)) {
+      throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + field + ", sort=" + config.getIndexSort());
+    }
     try {
       long seqNo = docWriter.updateDocValues(new NumericDocValuesUpdate(term, field, value));
       if (seqNo < 0) {
@@ -1713,6 +1716,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       if (!globalFieldNumberMap.contains(f.name(), dvType)) {
         throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType);
       }
+      if (config.getIndexSortFields().contains(f.name())) {
+        throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + f.name() + ", sort=" + config.getIndexSort());
+      }
       switch (dvType) {
         case NUMERIC:
           dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue());
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index 4f642eed52a..ce4f0a8e5c3 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -18,7 +18,9 @@ package org.apache.lucene.index;
 
 
 import java.io.PrintStream;
+import java.util.Arrays;
 import java.util.EnumSet;
+import java.util.stream.Collectors;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -474,6 +476,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
       }
     }
     this.indexSort = sort;
+    this.indexSortFields = Arrays.stream(sort.getSort()).map((s) -> s.getField()).collect(Collectors.toSet());
     return this;
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
index cec70c099aa..d9e1bc7bebb 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
@@ -17,6 +17,9 @@
 package org.apache.lucene.index;
 
 
+import java.util.Collections;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
@@ -98,6 +101,9 @@ public class LiveIndexWriterConfig {
   /** The sort order to use to write merged segments. */
   protected Sort indexSort = null;
 
+  /** The field names involved in the index sort */
+  protected Set<String> indexSortFields = Collections.emptySet();
+
   // used by IndexWriterConfig
   LiveIndexWriterConfig(Analyzer analyzer) {
     this.analyzer = analyzer;
@@ -457,6 +463,13 @@ public class LiveIndexWriterConfig {
     return indexSort;
   }
 
+  /**
+   * Returns the field names involved in the index sort
+   */
+  public Set<String> getIndexSortFields() {
+    return indexSortFields;
+  }
+
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 5ebf8f481d1..08a85ef3e24 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -1700,6 +1700,29 @@ public class TestIndexSorting extends LuceneTestCase {
     dir.close();
   }
 
+
+  // docvalues fields involved in the index sort cannot be updated
+  public void testBadDVUpdate() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    doc.add(new StringField("id", new BytesRef("0"), Store.NO));
+    doc.add(new NumericDocValuesField("foo", random().nextInt()));
+    w.addDocument(doc);
+    w.commit();
+    IllegalArgumentException exc = expectThrows(IllegalArgumentException.class,
+        () -> w.updateDocValues(new Term("id", "0"), new NumericDocValuesField("foo", -1)));
+    assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
+    exc = expectThrows(IllegalArgumentException.class,
+        () -> w.updateNumericDocValue(new Term("id", "0"), "foo", -1));
+    assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
+    w.close();
+    dir.close();
+  }
+
   static class DVUpdateRunnable implements Runnable {
 
     private final int numDocs;
@@ -1727,7 +1750,7 @@ public class TestIndexSorting extends LuceneTestCase {
           final long value = random.nextInt(20);
 
           synchronized (values) {
-            w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value));
+            w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("bar", value));
             values.put(id, value);
           }
 
@@ -1762,7 +1785,8 @@ public class TestIndexSorting extends LuceneTestCase {
     for (int i = 0; i < numDocs; ++i) {
       Document doc = new Document();
       doc.add(new StringField("id", Integer.toString(i), Store.NO));
-      doc.add(new NumericDocValuesField("foo", -1));
+      doc.add(new NumericDocValuesField("foo", random().nextInt()));
+      doc.add(new NumericDocValuesField("bar", -1));
       w.addDocument(doc);
       values.put(i, -1L);
     }
@@ -1786,7 +1810,7 @@ public class TestIndexSorting extends LuceneTestCase {
     for (int i = 0; i < numDocs; ++i) {
       final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
       assertEquals(1, topDocs.totalHits);
-      NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "foo");
+      NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "bar");
       int hitDoc = topDocs.scoreDocs[0].doc;
       assertEquals(hitDoc, dvs.advance(hitDoc));
       assertEquals(values.get(i).longValue(), dvs.longValue());

From 7cffae3a16f7d0c94a79a273a702c0013af7f5ac Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Fri, 9 Dec 2016 18:35:13 -0500
Subject: [PATCH 32/53] don't create unnecessary lambda

---
 .../src/java/org/apache/lucene/index/IndexWriterConfig.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index ce4f0a8e5c3..1e1e795d50b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -476,7 +476,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
       }
     }
     this.indexSort = sort;
-    this.indexSortFields = Arrays.stream(sort.getSort()).map((s) -> s.getField()).collect(Collectors.toSet());
+    this.indexSortFields = Arrays.stream(sort.getSort()).map(SortField::getField).collect(Collectors.toSet());
     return this;
   }
 

From 2b073a2f296289617bea8256d7efec06049df739 Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Fri, 9 Dec 2016 18:41:30 -0500
Subject: [PATCH 33/53] LUCENE-7570: don't run merges while holding the
 commitLock to prevent deadlock when merges are stalled and a tragic merge
 exception strikes

---
 lucene/CHANGES.txt                            |  4 ++
 .../org/apache/lucene/index/IndexWriter.java  | 28 +++++---
 .../index/TestTragicIndexWriterDeadlock.java  | 69 ++++++++++++++++++-
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da6e3d29508..15b89f09f2f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -94,6 +94,10 @@ Bug Fixes
   in the index sort, since this would lead to corruption.  (Jim
   Ferenczi via Mike McCandless)
 
+* LUCENE-7570: IndexWriter may deadlock if a commit is running while
+  there are too many merges running and one of the merges hits a
+  tragic exception (Joey Echeverria via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 3ee87b18304..47895050225 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -2952,11 +2952,16 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
   @Override
   public final long prepareCommit() throws IOException {
     ensureOpen();
-    pendingSeqNo = prepareCommitInternal(config.getMergePolicy());
+    boolean[] doMaybeMerge = new boolean[1];
+    pendingSeqNo = prepareCommitInternal(doMaybeMerge);
+    // we must do this outside of the commitLock else we can deadlock:
+    if (doMaybeMerge[0]) {
+      maybeMerge(config.getMergePolicy(), MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);      
+    }
     return pendingSeqNo;
   }
 
-  private long prepareCommitInternal(MergePolicy mergePolicy) throws IOException {
+  private long prepareCommitInternal(boolean[] doMaybeMerge) throws IOException {
     startCommitTime = System.nanoTime();
     synchronized(commitLock) {
       ensureOpen(false);
@@ -3063,7 +3068,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       boolean success = false;
       try {
         if (anySegmentsFlushed) {
-          maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);
+          doMaybeMerge[0] = true;
         }
         startCommit(toCommit);
         success = true;
@@ -3184,6 +3189,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       infoStream.message("IW", "commit: start");
     }
 
+    boolean[] doMaybeMerge = new boolean[1];
+
+    long seqNo;
+
     synchronized(commitLock) {
       ensureOpen(false);
 
@@ -3191,13 +3200,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
         infoStream.message("IW", "commit: enter lock");
       }
 
-      long seqNo;
-
       if (pendingCommit == null) {
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: now prepare");
         }
-        seqNo = prepareCommitInternal(mergePolicy);
+        seqNo = prepareCommitInternal(doMaybeMerge);
       } else {
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: already prepared");
@@ -3206,9 +3213,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       }
 
       finishCommit();
-
-      return seqNo;
     }
+
+    // we must do this outside of the commitLock else we can deadlock:
+    if (doMaybeMerge[0]) {
+      maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);      
+    }
+    
+    return seqNo;
   }
 
   private final void finishCommit() throws IOException {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
index 3cce69831b5..80f9392d0a7 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
@@ -14,13 +14,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.lucene.index;
 
-
+import java.io.IOException;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -92,4 +94,69 @@ public class TestTragicIndexWriterDeadlock extends LuceneTestCase {
     w.close();
     dir.close();
   }
+
+  // LUCENE-7570
+  public void testDeadlockStalledMerges() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig();
+
+    // so we merge every 2 segments:
+    LogMergePolicy mp = new LogDocMergePolicy();
+    mp.setMergeFactor(2);
+    iwc.setMergePolicy(mp);
+    CountDownLatch done = new CountDownLatch(1);
+    ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() {
+        @Override
+        protected void doMerge(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException {
+          // let merge takes forever, until commit thread is stalled
+          try {
+            done.await();
+          } catch (InterruptedException ie) {
+            Thread.currentThread().interrupt();
+            throw new RuntimeException(ie);
+          }
+          super.doMerge(writer, merge);
+        }
+
+        @Override
+        protected synchronized void doStall() {
+          done.countDown();
+          super.doStall();
+        }
+
+        @Override
+        protected void handleMergeException(Directory dir, Throwable exc) {
+        }
+      };
+
+    // so we stall once the 2nd merge wants to run:
+    cms.setMaxMergesAndThreads(1, 1);
+    iwc.setMergeScheduler(cms);
+
+    // so we write a segment every 2 indexed docs:
+    iwc.setMaxBufferedDocs(2);
+
+    final IndexWriter w = new IndexWriter(dir, iwc) {
+      @Override
+      void mergeSuccess(MergePolicy.OneMerge merge) {
+        // tragedy strikes!
+        throw new OutOfMemoryError();
+      }
+      };
+
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes first segment
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes second segment, and kicks off merge, that takes forever (done.await)
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes third segment
+    w.addDocument(new Document());
+    w.commit();
+    // w writes fourth segment, and commit flushes and kicks off merge that stalls
+    w.close();
+    dir.close();
+  }
 }

From d75abe1a3022b5d596b7fca4c7e8623782010a88 Mon Sep 17 00:00:00 2001
From: Erick Erickson <erick@apache.org>
Date: Sat, 10 Dec 2016 14:03:15 -0800
Subject: [PATCH 34/53] SOLR-9843: Fix up DocValuesNotIndexedTest failures.
 Debugging code

---
 .../solr/cloud/DocValuesNotIndexedTest.java   | 64 +++++++++++--------
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
index b8507b1a575..f5257f82865 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
@@ -18,15 +18,15 @@
 package org.apache.solr.cloud;
 
 import java.io.IOException;
-import java.text.SimpleDateFormat;
+import java.lang.invoke.MethodHandles;
+import java.time.Instant;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
 
 import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
 import org.apache.solr.client.solrj.SolrQuery;
@@ -50,11 +50,16 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import static org.apache.lucene.util.LuceneTestCase.random;
 import static org.apache.solr.client.solrj.request.schema.SchemaRequest.*;
 
 public class DocValuesNotIndexedTest extends SolrCloudTestCase {
 
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  
   @Rule
   public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule());
 
@@ -79,7 +84,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
         .process(cluster.getSolrClient());
 
     fieldsToTestSingle =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intField", "int", 1),
             new FieldProps("longField", "long", 1),
             new FieldProps("doubleField", "double", 1),
@@ -87,10 +92,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateField", "date", 1),
             new FieldProps("stringField", "string", 1),
             new FieldProps("boolField", "boolean", 1)
-        ).collect(Collectors.toList()));
+        ));
 
     fieldsToTestMulti =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intFieldMulti", "int", 5),
             new FieldProps("longFieldMulti", "long", 5),
             new FieldProps("doubleFieldMulti", "double", 5),
@@ -98,11 +103,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateFieldMulti", "date", 5),
             new FieldProps("stringFieldMulti", "string", 5),
             new FieldProps("boolFieldMulti", "boolean", 2)
-        ).collect(Collectors.toList()));
+        ));
 
     // Fields to test for grouping and sorting with sortMinssingFirst/Last.
     fieldsToTestGroupSortFirst =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intGSF", "int"),
             new FieldProps("longGSF", "long"),
             new FieldProps("doubleGSF", "double"),
@@ -110,10 +115,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateGSF", "date"),
             new FieldProps("stringGSF", "string"),
             new FieldProps("boolGSF", "boolean")
-        ).collect(Collectors.toList()));
+        ));
 
     fieldsToTestGroupSortLast =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intGSL", "int"),
             new FieldProps("longGSL", "long"),
             new FieldProps("doubleGSL", "double"),
@@ -121,7 +126,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateGSL", "date"),
             new FieldProps("stringGSL", "string"),
             new FieldProps("boolGSL", "boolean")
-        ).collect(Collectors.toList()));
+        ));
 
     List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
         fieldsToTestMulti.size() + fieldsToTestGroupSortFirst.size() + fieldsToTestGroupSortLast.size() +
@@ -235,7 +240,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
     new UpdateRequest()
         .add(docs)
         .commit(client, COLLECTION);
-
+    
     checkSortOrder(client, fieldsToTestGroupSortFirst, "asc", new String[]{"4", "2", "1", "3"}, new String[]{"4", "1", "2", "3"});
     checkSortOrder(client, fieldsToTestGroupSortFirst, "desc", new String[]{"3", "1", "2", "4"}, new String[]{"2", "3", "1", "4"});
 
@@ -251,6 +256,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
       solrQuery.addSort("id", SolrQuery.ORDER.asc);
       final QueryResponse rsp = client.query(COLLECTION, solrQuery);
       SolrDocumentList res = rsp.getResults();
+      //TODO remove after SOLR-9843
+      if (order.length != res.getNumFound()) {
+        log.error("(3) About to fail, response is: " + rsp.toString());
+      }
       assertEquals("Should have exactly " + order.length + " documents returned", order.length, res.getNumFound());
       String expected;
       for (int idx = 0; idx < res.size(); ++idx) {
@@ -264,7 +273,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
 
   @Test
   public void testGroupingDocAbsent() throws IOException, SolrServerException {
-    List<SolrInputDocument> docs = new ArrayList<>(3);
+    List<SolrInputDocument> docs = new ArrayList<>(4);
     docs.add(makeGSDoc(2, fieldsToTestGroupSortFirst, null));
     docs.add(makeGSDoc(1, fieldsToTestGroupSortFirst, null));
     docs.add(makeGSDoc(3, fieldsToTestGroupSortFirst, null));
@@ -296,7 +305,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
       if (prop.getName().startsWith("bool")) expected = 3; //true, false and null
 
       List<Group> fieldCommandGroups = fieldCommand.getValues();
-      assertEquals("Did not find the expected number of groups!", expected, fieldCommandGroups.size());
+      //TODO: remove me since this is excessive in the normal case, this is in for SOLR-9843
+      if (expected != fieldCommandGroups.size()) {
+        log.error("(1) About to fail assert, response is: " + rsp.toString());
+      }
+      assertEquals("Did not find the expected number of groups for field " + prop.getName(), expected, fieldCommandGroups.size());
     }
   }
 
@@ -316,7 +329,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
       docs.add(doc);
       if ((idx % 5) == 0) {
         doc = new SolrInputDocument();
-        doc.addField("id", idx + 100);
+        doc.addField("id", idx + 10_000);
         docs.add(doc);
       }
     }
@@ -368,6 +381,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
               break;
             
             default:
+              //TODO remove me after SOLR-9843
+              log.error("(2) About to fail, response is: " + rsp.toString());
               fail("Unexpected number of elements in the group for " + prop.getName() + ": " + grp.getResult().size());
           }
         }
@@ -450,8 +465,6 @@ class FieldProps {
   private Object base;
   private int counter = 0;
 
-  static SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
-
   FieldProps(String name, String type, int expectedCount) {
     this.name = name;
     this.type = type;
@@ -460,22 +473,23 @@ class FieldProps {
   }
   void resetBase() {
     if (name.startsWith("int")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextInt());
+      base = Math.abs(random().nextInt());
     } else if (name.startsWith("long")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
+      base = Math.abs(random().nextLong());
     } else if (name.startsWith("float")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextFloat());
+      base = Math.abs(random().nextFloat());
     } else if (name.startsWith("double")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextDouble());
+      base = Math.abs(random().nextDouble());
     } else if (name.startsWith("date")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
+      base = Math.abs(random().nextLong());
     } else if (name.startsWith("bool")) {
       base = true; // Must start with a known value since bools only have a two values....
     } else if (name.startsWith("string")) {
-      base = "base_string_" + DocValuesNotIndexedTest.random().nextInt(1_000_000) + "_";
+      base = "base_string_" + random().nextInt(1_000_000) + "_";
     } else {
       throw new RuntimeException("Should have found a prefix for the field before now!");
     }
+    counter = 0;
   }
 
   FieldProps(String name, String type) {
@@ -496,7 +510,7 @@ class FieldProps {
 
   public String getValue(boolean incrementCounter) {
     if (incrementCounter) {
-      counter += DocValuesNotIndexedTest.random().nextInt(10) + 100;
+      counter += random().nextInt(10) + 10_000;
     }
     if (name.startsWith("int")) {
       return Integer.toString((int) base + counter);
@@ -511,7 +525,7 @@ class FieldProps {
       return Double.toString((double) base + counter);
     }
     if (name.startsWith("date")) {
-      return format.format(985_847_645 + (long) base + counter);
+      return Instant.ofEpochMilli(985_847_645 + (long) base + counter).toString();
     }
     if (name.startsWith("bool")) {
       String ret = Boolean.toString((boolean) base);

From 25c7855bbae4eaa8700e72d094442811f0e8e1d9 Mon Sep 17 00:00:00 2001
From: Shai Erera <shaie@apache.org>
Date: Sun, 11 Dec 2016 13:08:33 +0200
Subject: [PATCH 35/53] Add .pydevproject to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 8091ecdba71..625cfa97c80 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ parent.iml
 **/pom.xml
 /nbproject
 /nb-build
+.pydevproject
 
 /solr/package
 

From 87d8b5450a6d75fdd4b724b24a3722054b6d00f8 Mon Sep 17 00:00:00 2001
From: Tommaso Teofili <tommaso@apache.org>
Date: Mon, 12 Dec 2016 10:00:21 +0100
Subject: [PATCH 36/53] LUCENE-7591 - approximate to no. of terms when DVs are
 not available

---
 .../apache/lucene/classification/utils/DatasetSplitter.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
index 8bb0b1dcdc2..7ab674eafdd 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
@@ -94,7 +94,8 @@ public class DatasetSplitter {
         }
       }
       if (classValues == null) {
-        throw new IllegalStateException("field \"" + classFieldName + "\" must have sorted (set) doc values");
+        // approximate with no. of terms
+        noOfClasses += leave.reader().terms(classFieldName).size();
       }
       noOfClasses += valueCount;
     }

From 39ba13046bc48beaa139923d5f9fbf7d6fc192b2 Mon Sep 17 00:00:00 2001
From: Varun Thacker <varun@apache.org>
Date: Mon, 12 Dec 2016 12:38:14 -0800
Subject: [PATCH 37/53] SOLR-9844: Improve FieldCache usage api response
 formatting and show total size information

---
 .../apache/lucene/index/SegmentCoreReaders.java    |  8 ++++++++
 solr/CHANGES.txt                                   |  3 +++
 .../apache/solr/search/SolrFieldCacheMBean.java    |  2 ++
 .../org/apache/solr/uninverting/FieldCache.java    | 14 +++++---------
 .../solr/uninverting/FieldCacheSanityChecker.java  |  3 ++-
 .../apache/solr/uninverting/UninvertingReader.java | 10 ++++++++++
 .../solr/search/TestSolrFieldCacheMBean.java       |  1 +
 7 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
index e99c1ada266..21ac4a16e98 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
@@ -56,6 +56,7 @@ final class SegmentCoreReaders {
   final TermVectorsReader termVectorsReaderOrig;
   final PointsReader pointsReader;
   final Directory cfsReader;
+  final String segment;
   /** 
    * fieldinfos for this core: means gen=-1.
    * this is the exact fieldinfos these codec components saw at write.
@@ -98,6 +99,8 @@ final class SegmentCoreReaders {
         cfsDir = dir;
       }
 
+      segment = si.info.name;
+
       coreFieldInfos = codec.fieldInfosFormat().read(cfsDir, si.info, "", context);
       
       final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context);
@@ -192,4 +195,9 @@ final class SegmentCoreReaders {
   void removeCoreClosedListener(CoreClosedListener listener) {
     coreClosedListeners.remove(listener);
   }
+
+  @Override
+  public String toString() {
+    return "SegmentCoreReader(" + segment + ")";
+  }
 }
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 78f7f5580f9..37cccaea5db 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -164,6 +164,9 @@ New Features
 * SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core
   (re)load delays on systems with missconfigured hostname/DNS (hossman)
 
+* SOLR-9844: FieldCache information fetched via the mbeans handler or seen via the UI now displays the total size used.
+  The individual cache entries in the response are now formatted better as well. (Varun Thacker)
+
 Optimizations
 ----------------------
 * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
diff --git a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
index 62bc4fa1ff2..70781e967f5 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
@@ -64,11 +64,13 @@ public class SolrFieldCacheMBean implements JmxAugmentedSolrInfoMBean {
     if (listEntries) {
       String[] entries = UninvertingReader.getUninvertedStats();
       stats.add("entries_count", entries.length);
+      stats.add("total_size", UninvertingReader.getTotalSize());
       for (int i = 0; i < entries.length; i++) {
         stats.add("entry#" + i, entries[i]);
       }
     } else {
       stats.add("entries_count", UninvertingReader.getUninvertedStatsSize());
+      stats.add("total_size", UninvertingReader.getTotalSize());
     }
     return stats;
   }
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
index 32f56152c8c..544800e3d3b 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -384,7 +384,7 @@ public interface FieldCache {
       return custom;
     }
 
-    public Object getValue() {
+    public Accountable getValue() {
       return value;
     }
 
@@ -399,15 +399,11 @@ public interface FieldCache {
     
     @Override
     public String toString() {
-      StringBuilder b = new StringBuilder(250);
-      b.append("'").append(getReaderKey()).append("'=>");
-      b.append("'").append(getFieldName()).append("',");
-      b.append(getCacheType()).append(",").append(getCustom());
-      b.append("=>").append(getValue().getClass().getName()).append("#");
-      b.append(System.identityHashCode(getValue()));
-      
+      StringBuilder b = new StringBuilder(100);
+      b.append("segment='").append(getReaderKey().toString()).append("', ");
+      b.append("field='").append(getFieldName()).append("', ");
       String s = getEstimatedSize();
-      b.append(" (size =~ ").append(s).append(')');
+      b.append("size =~ ").append(s);
 
       return b.toString();
     }
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
index ec398f2174a..3d874ce4bfb 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
@@ -27,6 +27,7 @@ import java.util.Set;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.MapOfSets;
 import org.apache.solr.uninverting.FieldCache.CacheEntry;
 
@@ -103,7 +104,7 @@ final class FieldCacheSanityChecker {
     // iterate over all the cacheEntries to get the mappings we'll need
     for (int i = 0; i < cacheEntries.length; i++) {
       final CacheEntry item = cacheEntries[i];
-      final Object val = item.getValue();
+      final Accountable val = item.getValue();
 
       // It's OK to have dup entries, where one is eg
       // float[] and the other is the Bits (from
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index 78256664dee..87fb7a6bd42 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.solr.uninverting.FieldCache.CacheEntry;
 
 /**
@@ -386,4 +387,13 @@ public class UninvertingReader extends FilterLeafReader {
   public static int getUninvertedStatsSize() {
     return FieldCache.DEFAULT.getCacheEntries().length;
   }
+
+  public static String getTotalSize() {
+    CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
+    long totalBytesUsed = 0;
+    for (int i = 0; i < entries.length; i++) {
+      totalBytesUsed += entries[i].getValue().ramBytesUsed();
+    }
+    return RamUsageEstimator.humanReadableUnits(totalBytesUsed);
+  }
 }
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
index 5343f7345fb..a705e1ec94a 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
@@ -76,6 +76,7 @@ public class TestSolrFieldCacheMBean extends SolrTestCaseJ4 {
     SolrFieldCacheMBean mbean = new SolrFieldCacheMBean();
     NamedList stats = checkJmx ? mbean.getStatisticsForJmx() : mbean.getStatistics();
     assert(new Integer(stats.get("entries_count").toString()) > 0);
+    assertNotNull(stats.get("total_size"));
     assertNull(stats.get("entry#0"));
   }
 }

From fecbbe081fd4a777f01517fdd8631e69797def38 Mon Sep 17 00:00:00 2001
From: Varun Thacker <varun@apache.org>
Date: Mon, 12 Dec 2016 15:28:22 -0800
Subject: [PATCH 38/53] SOLR-9707: Don't forward DeleteByQuery requests to down
 replicas

---
 solr/CHANGES.txt                                          | 2 ++
 .../solr/update/processor/DistributedUpdateProcessor.java | 8 +++-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 37cccaea5db..a8a3f971023 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -240,6 +240,8 @@ Bug Fixes
 * SOLR-9834: A variety of spots in the code can create a collection zk node after the collection has been 
   removed. (Mark Miller)
 
+* SOLR-9707: Don't forward DeleteByQuery requests to down replicas. (Jessica Cheng Mallet via Varun Thacker)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
index b8bdd16cfb4..c62a90af260 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
@@ -658,8 +658,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
     String shardId = cloudDesc.getShardId();
 
     try {
-      Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(
-          collection, shardId);
+      Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, shardId);
       isLeader = leaderReplica.getName().equals(
           req.getCore().getCoreDescriptor().getCloudDescriptor()
               .getCoreNodeName());
@@ -668,7 +667,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
 
       forwardToLeader = false;
       List<ZkCoreNodeProps> replicaProps = zkController.getZkStateReader()
-          .getReplicaProps(collection, shardId, leaderReplica.getName());
+          .getReplicaProps(collection, shardId, leaderReplica.getName(), null, Replica.State.DOWN);
       if (replicaProps != null) {
         nodes = new ArrayList<>(replicaProps.size());
         for (ZkCoreNodeProps props : replicaProps) {
@@ -677,8 +676,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
       }
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
-      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "",
-          e);
+      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
     }
 
     return nodes;

From 8c79ab2649437c8c7ca275f6481c058c67626660 Mon Sep 17 00:00:00 2001
From: Erick <erick@apache.org>
Date: Mon, 12 Dec 2016 18:43:30 -0800
Subject: [PATCH 39/53] SOLR:9823: CoreContainer incorrectly setting
 MDCLoggingContext for core

---
 solr/CHANGES.txt                                           | 2 ++
 solr/core/src/java/org/apache/solr/core/CoreContainer.java | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a8a3f971023..41af0ff23d5 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -242,6 +242,8 @@ Bug Fixes
 
 * SOLR-9707: Don't forward DeleteByQuery requests to down replicas. (Jessica Cheng Mallet via Varun Thacker)
 
+* SOLR-9823: CoreContainer incorrectly setting MDCLoggingContext for core (Jessica Cheng Mallet via Erick Erickson)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index ad4560e5bbb..7c38b81f40d 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -857,7 +857,7 @@ public class CoreContainer {
 
     SolrCore core = null;
     try {
-      MDCLoggingContext.setCore(core);
+      MDCLoggingContext.setCoreDescriptor(dcore);
       SolrIdentifierValidator.validateCoreName(dcore.getName());
       if (zkSys.getZkController() != null) {
         zkSys.getZkController().preRegister(dcore);

From 9aa5b734c38ed0b9327577bd2b1413d448230eab Mon Sep 17 00:00:00 2001
From: Nicholas Knize <nknize@gmail.com>
Date: Tue, 13 Dec 2016 15:07:06 -0600
Subject: [PATCH 40/53] fix RangeFieldQuery.scorer to return null if no docs in
 a segment indexed the field

---
 .../src/java/org/apache/lucene/document/RangeFieldQuery.java    | 1 +
 .../org/apache/lucene/search/BaseRangeFieldQueryTestCase.java   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
index 7ebdec491ee..52491912cea 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
@@ -165,6 +165,7 @@ abstract class RangeFieldQuery extends Query {
         FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
         if (fieldInfo == null) {
           // no docs in this segment indexed this field
+          return null;
         }
         checkFieldInfo(fieldInfo);
         boolean allDocsMatch = true;
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java b/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
index ff61ff65809..ceafd5360da 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
@@ -93,7 +93,7 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
         ranges[id] = new Range[] {nextRange(dimensions)};
       }
       if (x == 17) {
-        // dome docs don't have a box:
+        // some docs don't have a box:
         ranges[id][0].isMissing = true;
         if (VERBOSE) {
           System.out.println("  id=" + id + " is missing");

From ad7152ad4739a47aa2b45405ba1682b3dda18923 Mon Sep 17 00:00:00 2001
From: Shai Erera <shaie@apache.org>
Date: Sun, 11 Dec 2016 12:49:50 +0200
Subject: [PATCH 41/53] LUCENE-7590: add DocValuesStatsCollector

---
 .../apache/lucene/search/DocValuesStats.java  | 165 +++++++++++++++++
 .../search/DocValuesStatsCollector.java       |  64 +++++++
 .../search/TestDocValuesStatsCollector.java   | 166 ++++++++++++++++++
 3 files changed, 395 insertions(+)
 create mode 100644 lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
 create mode 100644 lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
 create mode 100644 lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java

diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
new file mode 100644
index 00000000000..fad9f97f0e2
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+
+/** Holds statistics for a DocValues field. */
+public abstract class DocValuesStats<T> {
+
+  private int missing = 0;
+  private int count = 0;
+
+  protected final String field;
+
+  protected T min;
+  protected T max;
+
+  protected DocValuesStats(String field, T initialMin, T initialMax) {
+    this.field = field;
+    this.min = initialMin;
+    this.max = initialMax;
+  }
+
+  /**
+   * Called after #{@link DocValuesStats#accumulate(int)} was processed and verified that the document has a value for
+   * the field. Implementations should update the statistics based on the value of the current document.
+   *
+   * @param count
+   *          the updated number of documents with value for this field.
+   */
+  protected abstract void doAccumulate(int count) throws IOException;
+
+  /**
+   * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e.
+   * it does have the requested DocValues field).
+   */
+  protected abstract boolean init(LeafReaderContext contxt) throws IOException;
+
+  /** Returns whether the given document has a value for the requested DocValues field. */
+  protected abstract boolean hasValue(int doc) throws IOException;
+
+  final void accumulate(int doc) throws IOException {
+    if (hasValue(doc)) {
+      ++count;
+      doAccumulate(count);
+    } else {
+      ++missing;
+    }
+  }
+
+  final void addMissing() {
+    ++missing;
+  }
+
+  /** The field for which these stats were computed. */
+  public final String field() {
+    return field;
+  }
+
+  /** The number of documents which have a value of the field. */
+  public final int count() {
+    return count;
+  }
+
+  /** The number of documents which do not have a value of the field. */
+  public final int missing() {
+    return missing;
+  }
+
+  /** The minimum value of the field. Undefined when {@link #count} is zero. */
+  public final T min() {
+    return min;
+  }
+
+  /** The maximum value of the field. Undefined when {@link #count} is zero. */
+  public final T max() {
+    return max;
+  }
+
+  /** Holds statistics for a numeric DocValues field. */
+  public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
+
+    protected double mean = 0.0;
+
+    protected NumericDocValues ndv;
+
+    protected NumericDocValuesStats(String field, T initialMin, T initialMax) {
+      super(field, initialMin, initialMax);
+    }
+
+    @Override
+    protected final boolean init(LeafReaderContext contxt) throws IOException {
+      ndv = contxt.reader().getNumericDocValues(field);
+      return ndv != null;
+    }
+
+    @Override
+    protected boolean hasValue(int doc) throws IOException {
+      return ndv.advanceExact(doc);
+    }
+
+    /** The mean of all values of the field. Undefined when {@link #count} is zero. */
+    public final double mean() {
+      return mean;
+    }
+  }
+
+  /** Holds DocValues statistics for a numeric field storing {@code long} values. */
+  public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
+
+    public LongDocValuesStats(String description) {
+      super(description, Long.MAX_VALUE, Long.MIN_VALUE);
+    }
+
+    @Override
+    protected void doAccumulate(int count) throws IOException {
+      long val = ndv.longValue();
+      if (val > max) {
+        max = val;
+      }
+      if (val < min) {
+        min = val;
+      }
+      mean += (val - mean) / count;
+    }
+  }
+
+  /** Holds DocValues statistics for a numeric field storing {@code double} values. */
+  public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
+
+    public DoubleDocValuesStats(String description) {
+      super(description, Double.MAX_VALUE, Double.MIN_VALUE);
+    }
+
+    @Override
+    protected void doAccumulate(int count) throws IOException {
+      double val = Double.longBitsToDouble(ndv.longValue());
+      if (Double.compare(val, max) > 0) {
+        max = val;
+      }
+      if (Double.compare(val, min) < 0) {
+        min = val;
+      }
+      mean += (val - mean) / count;
+    }
+  }
+
+}
\ No newline at end of file
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
new file mode 100644
index 00000000000..2b1fa4fb852
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.LeafReaderContext;
+
+/** A {@link Collector} which computes statistics for a DocValues field. */
+public class DocValuesStatsCollector implements Collector {
+
+  private final DocValuesStats<?> stats;
+
+  /** Creates a collector to compute statistics for a DocValues field using the given {@code stats}. */
+  public DocValuesStatsCollector(DocValuesStats<?> stats) {
+    this.stats = stats;
+  }
+
+  @Override
+  public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+    boolean shouldProcess = stats.init(context);
+    if (!shouldProcess) {
+      // Stats cannot be computed for this segment, therefore consider all matching documents as a 'miss'. 
+      return new LeafCollector() {
+        @Override public void setScorer(Scorer scorer) throws IOException {}
+
+        @Override
+        public void collect(int doc) throws IOException {
+          // All matching documents in this reader are missing a value
+          stats.addMissing();
+        }
+      };
+    }
+
+    return new LeafCollector() {
+      @Override public void setScorer(Scorer scorer) throws IOException {}
+
+      @Override
+      public void collect(int doc) throws IOException {
+        stats.accumulate(doc);
+      }
+    };
+  }
+
+  @Override
+  public boolean needsScores() {
+    return false;
+  }
+
+}
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
new file mode 100644
index 00000000000..65f82e62d42
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.stream.DoubleStream;
+import java.util.stream.LongStream;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats;
+import org.apache.lucene.search.DocValuesStats.LongDocValuesStats;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/** Unit tests for {@link DocValuesStatsCollector}. */
+public class TestDocValuesStatsCollector extends LuceneTestCase {
+
+  public void testNoDocsWithField() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      int numDocs = TestUtil.nextInt(random(), 1, 100);
+      for (int i = 0; i < numDocs; i++) {
+        indexWriter.addDocument(new Document());
+      }
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        LongDocValuesStats stats = new LongDocValuesStats("foo");
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        assertEquals(0, stats.count());
+        assertEquals(numDocs, stats.missing());
+      }
+    }
+  }
+
+  public void testRandomDocsWithLongValues() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      String field = "numeric";
+      int numDocs = TestUtil.nextInt(random(), 1, 100);
+      long[] docValues = new long[numDocs];
+      int nextVal = 1;
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        if (random().nextBoolean()) { // not all documents have a value
+          doc.add(new NumericDocValuesField(field, nextVal));
+          doc.add(new StringField("id", "doc" + i, Store.NO));
+          docValues[i] = nextVal;
+          ++nextVal;
+        }
+        indexWriter.addDocument(doc);
+      }
+
+      // 20% of cases delete some docs
+      if (random().nextDouble() < 0.2) {
+        for (int i = 0; i < numDocs; i++) {
+          if (random().nextBoolean()) {
+            indexWriter.deleteDocuments(new Term("id", "doc" + i));
+            docValues[i] = 0;
+          }
+        }
+      }
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        LongDocValuesStats stats = new LongDocValuesStats(field);
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
+        assertEquals(expCount, stats.count());
+        assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
+        if (stats.count() > 0) {
+          assertEquals(getPositiveValues(docValues).max().getAsLong(), stats.max().longValue());
+          assertEquals(getPositiveValues(docValues).min().getAsLong(), stats.min().longValue());
+          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+        }
+      }
+    }
+  }
+
+  public void testRandomDocsWithDoubleValues() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      String field = "numeric";
+      int numDocs = TestUtil.nextInt(random(), 1, 100);
+      double[] docValues = new double[numDocs];
+      double nextVal = 1.0;
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        if (random().nextBoolean()) { // not all documents have a value
+          doc.add(new DoubleDocValuesField(field, nextVal));
+          doc.add(new StringField("id", "doc" + i, Store.NO));
+          docValues[i] = nextVal;
+          ++nextVal;
+        }
+        indexWriter.addDocument(doc);
+      }
+
+      // 20% of cases delete some docs
+      if (random().nextDouble() < 0.2) {
+        for (int i = 0; i < numDocs; i++) {
+          if (random().nextBoolean()) {
+            indexWriter.deleteDocuments(new Term("id", "doc" + i));
+            docValues[i] = 0;
+          }
+        }
+      }
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
+        assertEquals(expCount, stats.count());
+        assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
+        if (stats.count() > 0) {
+          assertEquals(getPositiveValues(docValues).max().getAsDouble(), stats.max().doubleValue(), 0.00001);
+          assertEquals(getPositiveValues(docValues).min().getAsDouble(), stats.min().doubleValue(), 0.00001);
+          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+        }
+      }
+    }
+  }
+
+  private static LongStream getPositiveValues(long[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v > 0);
+  }
+
+  private static DoubleStream getPositiveValues(double[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v > 0);
+  }
+
+  private static LongStream getZeroValues(long[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v == 0);
+  }
+
+  private static DoubleStream getZeroValues(double[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v == 0);
+  }
+
+}

From 770f1eb8ad6af5cce55d1bdf52f1288216c9691f Mon Sep 17 00:00:00 2001
From: Shai Erera <shaie@apache.org>
Date: Wed, 14 Dec 2016 13:07:19 +0200
Subject: [PATCH 42/53] Fix LeafReader.getNumericDocValues javadoc

---
 lucene/core/src/java/org/apache/lucene/index/LeafReader.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
index acdd0d87bd7..73394f23670 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
@@ -242,7 +242,7 @@ public abstract class LeafReader extends IndexReader {
   /** Returns {@link NumericDocValues} for this field, or
    *  null if no numeric doc values were indexed for
    *  this field.  The returned instance should only be
-   *  used by a single thread.  This will never return null. */
+   *  used by a single thread. */
   public abstract NumericDocValues getNumericDocValues(String field) throws IOException;
 
   /** Returns {@link BinaryDocValues} for this field, or

From 85582dabe4372085e1af5d01ebbfcfd0303b9f12 Mon Sep 17 00:00:00 2001
From: Shai Erera <shaie@apache.org>
Date: Wed, 14 Dec 2016 13:28:02 +0200
Subject: [PATCH 43/53] LUCENE-7590: fix typo in method parameter

---
 .../src/java/org/apache/lucene/search/DocValuesStats.java   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
index fad9f97f0e2..38158cf47d9 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -51,7 +51,7 @@ public abstract class DocValuesStats<T> {
    * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e.
    * it does have the requested DocValues field).
    */
-  protected abstract boolean init(LeafReaderContext contxt) throws IOException;
+  protected abstract boolean init(LeafReaderContext context) throws IOException;
 
   /** Returns whether the given document has a value for the requested DocValues field. */
   protected abstract boolean hasValue(int doc) throws IOException;
@@ -106,8 +106,8 @@ public abstract class DocValuesStats<T> {
     }
 
     @Override
-    protected final boolean init(LeafReaderContext contxt) throws IOException {
-      ndv = contxt.reader().getNumericDocValues(field);
+    protected final boolean init(LeafReaderContext context) throws IOException {
+      ndv = context.reader().getNumericDocValues(field);
       return ndv != null;
     }
 

From 22d9af41a435feaa3307880b7c7ed4f5860faa21 Mon Sep 17 00:00:00 2001
From: Shai Erera <shaie@apache.org>
Date: Wed, 14 Dec 2016 13:49:42 +0200
Subject: [PATCH 44/53] Rename constructor parameter name

---
 .../src/java/org/apache/lucene/search/DocValuesStats.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
index 38158cf47d9..998bef4fe21 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -125,8 +125,8 @@ public abstract class DocValuesStats<T> {
   /** Holds DocValues statistics for a numeric field storing {@code long} values. */
   public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
 
-    public LongDocValuesStats(String description) {
-      super(description, Long.MAX_VALUE, Long.MIN_VALUE);
+    public LongDocValuesStats(String field) {
+      super(field, Long.MAX_VALUE, Long.MIN_VALUE);
     }
 
     @Override
@@ -145,8 +145,8 @@ public abstract class DocValuesStats<T> {
   /** Holds DocValues statistics for a numeric field storing {@code double} values. */
   public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
 
-    public DoubleDocValuesStats(String description) {
-      super(description, Double.MAX_VALUE, Double.MIN_VALUE);
+    public DoubleDocValuesStats(String field) {
+      super(field, Double.MAX_VALUE, Double.MIN_VALUE);
     }
 
     @Override

From e82399d0677651ad4be1d8d2bdc4777b5d90b0fa Mon Sep 17 00:00:00 2001
From: markrmiller <markrmiller@apache.org>
Date: Mon, 12 Dec 2016 11:10:58 -0500
Subject: [PATCH 45/53] SOLR-1953: It may be possible for temporary files to
 accumulate until the Solr process is shut down.

---
 solr/CHANGES.txt                              |   3 +
 .../solr/servlet/SolrDispatchFilter.java      |  15 ++
 .../solr/servlet/SolrRequestParsers.java      |  31 ++--
 .../solr/util/SolrFileCleaningTracker.java    | 147 ++++++++++++++++++
 4 files changed, 182 insertions(+), 14 deletions(-)
 create mode 100644 solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 41af0ff23d5..946a04e6936 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -244,6 +244,9 @@ Bug Fixes
 
 * SOLR-9823: CoreContainer incorrectly setting MDCLoggingContext for core (Jessica Cheng Mallet via Erick Erickson)
 
+* SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down.
+  (Karl Wright, Mark Miller)
+
 Other Changes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
index 5a4cfb627fe..e8c4657f378 100644
--- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
+++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
@@ -45,6 +45,7 @@ import java.util.concurrent.atomic.AtomicReference;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.commons.io.FileCleaningTracker;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.commons.io.output.CloseShieldOutputStream;
 import org.apache.commons.lang.StringUtils;
@@ -62,6 +63,7 @@ import org.apache.solr.core.SolrXmlConfig;
 import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.security.AuthenticationPlugin;
 import org.apache.solr.security.PKIAuthenticationPlugin;
+import org.apache.solr.util.SolrFileCleaningTracker;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -123,6 +125,8 @@ public class SolrDispatchFilter extends BaseSolrFilter {
   {
     log.trace("SolrDispatchFilter.init(): {}", this.getClass().getClassLoader());
 
+    SolrRequestParsers.fileCleaningTracker = new SolrFileCleaningTracker();
+    
     StartupLoggingUtils.checkLogDir();
     logWelcomeBanner();
     String muteConsole = System.getProperty(SOLR_LOG_MUTECONSOLE);
@@ -240,6 +244,17 @@ public class SolrDispatchFilter extends BaseSolrFilter {
   
   @Override
   public void destroy() {
+    try {
+      FileCleaningTracker fileCleaningTracker = SolrRequestParsers.fileCleaningTracker;
+      if (fileCleaningTracker != null) {
+        fileCleaningTracker.exitWhenFinished();
+      }
+    } catch (Exception e) {
+      log.warn("Exception closing FileCleaningTracker", e);
+    } finally {
+      SolrRequestParsers.fileCleaningTracker = null;
+    }
+
     if (cores != null) {
       try {
         cores.shutdown();
diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
index 9d7e7d9aced..968320e28ee 100644
--- a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
+++ b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
@@ -42,6 +42,7 @@ import java.util.Map;
 import org.apache.commons.fileupload.FileItem;
 import org.apache.commons.fileupload.disk.DiskFileItemFactory;
 import org.apache.commons.fileupload.servlet.ServletFileUpload;
+import org.apache.commons.io.FileCleaningTracker;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.lucene.util.IOUtils;
 import org.apache.solr.common.SolrException;
@@ -58,6 +59,7 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequestBase;
 import org.apache.solr.util.RTimerTree;
+import org.apache.solr.util.SolrFileCleaningTracker;
 
 import static org.apache.solr.common.params.CommonParams.PATH;
 
@@ -88,6 +90,8 @@ public class SolrRequestParsers
   /** Default instance for e.g. admin requests. Limits to 2 MB uploads and does not allow remote streams. */
   public static final SolrRequestParsers DEFAULT = new SolrRequestParsers();
   
+  public static volatile SolrFileCleaningTracker fileCleaningTracker;
+  
   /**
    * Pass in an xml configuration.  A null configuration will enable
    * everything with maximum values.
@@ -532,31 +536,30 @@ public class SolrRequestParsers
   /**
    * Extract Multipart streams
    */
-  static class MultipartRequestParser implements SolrRequestParser
-  {
+  static class MultipartRequestParser implements SolrRequestParser {
     private final int uploadLimitKB;
+    private DiskFileItemFactory factory = new DiskFileItemFactory();
     
-    public MultipartRequestParser( int limit )
-    {
+    public MultipartRequestParser(int limit) {
       uploadLimitKB = limit;
+
+      // Set factory constraints
+      FileCleaningTracker fct = fileCleaningTracker;
+      if (fct != null) {
+        factory.setFileCleaningTracker(fileCleaningTracker);
+      }
+      // TODO - configure factory.setSizeThreshold(yourMaxMemorySize);
+      // TODO - configure factory.setRepository(yourTempDirectory);
     }
     
     @Override
-    public SolrParams parseParamsAndFillStreams( 
-        final HttpServletRequest req, ArrayList<ContentStream> streams ) throws Exception
-    {
+    public SolrParams parseParamsAndFillStreams(
+        final HttpServletRequest req, ArrayList<ContentStream> streams) throws Exception {
       if( !ServletFileUpload.isMultipartContent(req) ) {
         throw new SolrException( ErrorCode.BAD_REQUEST, "Not multipart content! "+req.getContentType() );
       }
       
       MultiMapSolrParams params = parseQueryString( req.getQueryString() );
-      
-      // Create a factory for disk-based file items
-      DiskFileItemFactory factory = new DiskFileItemFactory();
-
-      // Set factory constraints
-      // TODO - configure factory.setSizeThreshold(yourMaxMemorySize);
-      // TODO - configure factory.setRepository(yourTempDirectory);
 
       // Create a new file upload handler
       ServletFileUpload upload = new ServletFileUpload(factory);
diff --git a/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java b/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java
new file mode 100644
index 00000000000..9c66f0feadb
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+import java.io.File;
+import java.lang.ref.PhantomReference;
+import java.lang.ref.ReferenceQueue;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.commons.io.FileCleaningTracker;
+import org.apache.commons.io.FileDeleteStrategy;
+
+public class SolrFileCleaningTracker extends FileCleaningTracker {
+
+  ReferenceQueue<Object> q = new ReferenceQueue<>();
+
+  final Collection<Tracker> trackers = Collections.synchronizedSet(new HashSet<Tracker>());
+
+  final List<String> deleteFailures = Collections.synchronizedList(new ArrayList<String>());
+
+  volatile boolean exitWhenFinished = false;
+
+  Thread reaper;
+
+  public void track(final File file, final Object marker) {
+    track(file, marker, null);
+  }
+
+  public void track(final File file, final Object marker, final FileDeleteStrategy deleteStrategy) {
+    if (file == null) {
+      throw new NullPointerException("The file must not be null");
+    }
+    addTracker(file.getPath(), marker, deleteStrategy);
+  }
+
+  public void track(final String path, final Object marker) {
+    track(path, marker, null);
+  }
+
+  public void track(final String path, final Object marker, final FileDeleteStrategy deleteStrategy) {
+    if (path == null) {
+      throw new NullPointerException("The path must not be null");
+    }
+    addTracker(path, marker, deleteStrategy);
+  }
+
+  private synchronized void addTracker(final String path, final Object marker,
+      final FileDeleteStrategy deleteStrategy) {
+    if (exitWhenFinished) {
+      throw new IllegalStateException("No new trackers can be added once exitWhenFinished() is called");
+    }
+    if (reaper == null) {
+      reaper = new Reaper();
+      reaper.start();
+    }
+    trackers.add(new Tracker(path, deleteStrategy, marker, q));
+  }
+
+  public int getTrackCount() {
+    return trackers.size();
+  }
+
+  public List<String> getDeleteFailures() {
+    return deleteFailures;
+  }
+
+  public synchronized void exitWhenFinished() {
+    // synchronized block protects reaper
+    exitWhenFinished = true;
+    if (reaper != null) {
+      synchronized (reaper) {
+        reaper.interrupt();
+        try {
+          reaper.join();
+        } catch (InterruptedException e) { 
+          Thread.currentThread().interrupt();
+        }
+      }
+    }
+  }
+
+  private final class Reaper extends Thread {
+    Reaper() {
+      super("MultiPart Upload Tmp File Reaper");
+      setDaemon(true);
+    }
+
+    @Override
+    public void run() {
+      while (exitWhenFinished == false || trackers.size() > 0) {
+        try {
+          // Wait for a tracker to remove.
+          final Tracker tracker = (Tracker) q.remove(); // cannot return null
+          trackers.remove(tracker);
+          if (!tracker.delete()) {
+            deleteFailures.add(tracker.getPath());
+          }
+          tracker.clear();
+        } catch (final InterruptedException e) {
+          Thread.currentThread().interrupt();
+          break;
+        }
+      }
+    }
+  }
+
+  private static final class Tracker extends PhantomReference<Object> {
+
+    private final String path;
+
+    private final FileDeleteStrategy deleteStrategy;
+
+    Tracker(final String path, final FileDeleteStrategy deleteStrategy, final Object marker,
+        final ReferenceQueue<? super Object> queue) {
+      super(marker, queue);
+      this.path = path;
+      this.deleteStrategy = deleteStrategy == null ? FileDeleteStrategy.NORMAL : deleteStrategy;
+    }
+
+    public String getPath() {
+      return path;
+    }
+
+    public boolean delete() {
+      return deleteStrategy.deleteQuietly(new File(path));
+    }
+  }
+
+}
\ No newline at end of file

From 7dec783b287ab554cc781622b4d6127e553fd2ae Mon Sep 17 00:00:00 2001
From: markrmiller <markrmiller@apache.org>
Date: Sun, 11 Dec 2016 22:02:48 -0500
Subject: [PATCH 46/53] SOLR-9846: OverseerAutoReplicaFailoverThread can take
 too long to stop and leak out of unit tests.

---
 solr/CHANGES.txt                                         | 2 ++
 .../solr/cloud/OverseerAutoReplicaFailoverThread.java    | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 946a04e6936..5f0357b3608 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -288,6 +288,8 @@ Other Changes
   response (instead of a SolrException) and includes the remote error message as part of the exception message
   (Tomás Fernández Löbbe)
 
+* SOLR-9846: OverseerAutoReplicaFailoverThread can take too long to stop and leak out of unit tests. (Mark Miller)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
index 83679a549c7..10b4bf3fb0e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
@@ -89,6 +89,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
 
   private final int workLoopDelay;
   private final int waitAfterExpiration;
+
+  private volatile Thread thread;
   
   public OverseerAutoReplicaFailoverThread(CloudConfig config, ZkStateReader zkStateReader,
       UpdateShardHandler updateShardHandler) {
@@ -118,7 +120,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
   
   @Override
   public void run() {
-    
+    this.thread = Thread.currentThread();
     while (!this.isClosed) {
       // work loop
       log.debug("do " + this.getClass().getSimpleName() + " work loop");
@@ -136,7 +138,6 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
         try {
           Thread.sleep(workLoopDelay);
         } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
           return;
         }
       }
@@ -480,6 +481,10 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
   @Override
   public void close() {
     isClosed = true;
+    Thread lThread = thread;
+    if (lThread != null) {
+      lThread.interrupt();
+    }
   }
   
   public boolean isClosed() {

From 512374384a8984c56c91f47dcac4aaf0490eda54 Mon Sep 17 00:00:00 2001
From: Varun Thacker <varun@apache.org>
Date: Tue, 13 Dec 2016 15:52:17 -0800
Subject: [PATCH 47/53] SOLR-9844: Display fc total size only when field
 entries asked for

---
 .../solr/search/SolrFieldCacheMBean.java      |  6 ++---
 .../solr/uninverting/UninvertingReader.java   | 25 +++++++++++++------
 .../solr/search/TestSolrFieldCacheMBean.java  |  3 ++-
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
index 70781e967f5..642b7087846 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
@@ -62,15 +62,15 @@ public class SolrFieldCacheMBean implements JmxAugmentedSolrInfoMBean {
   private NamedList getStats(boolean listEntries) {
     NamedList stats = new SimpleOrderedMap();
     if (listEntries) {
-      String[] entries = UninvertingReader.getUninvertedStats();
+      UninvertingReader.FieldCacheStats fieldCacheStats = UninvertingReader.getUninvertedStats();
+      String[] entries = fieldCacheStats.info;
       stats.add("entries_count", entries.length);
-      stats.add("total_size", UninvertingReader.getTotalSize());
+      stats.add("total_size", fieldCacheStats.totalSize);
       for (int i = 0; i < entries.length; i++) {
         stats.add("entry#" + i, entries[i]);
       }
     } else {
       stats.add("entries_count", UninvertingReader.getUninvertedStatsSize());
-      stats.add("total_size", UninvertingReader.getTotalSize());
     }
     return stats;
   }
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index 87fb7a6bd42..5276ca9da30 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -375,25 +375,34 @@ public class UninvertingReader extends FilterLeafReader {
    * Return information about the backing cache
    * @lucene.internal 
    */
-  public static String[] getUninvertedStats() {
+  public static FieldCacheStats getUninvertedStats() {
     CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
+    long totalBytesUsed = 0;
     String[] info = new String[entries.length];
     for (int i = 0; i < entries.length; i++) {
       info[i] = entries[i].toString();
+      totalBytesUsed += entries[i].getValue().ramBytesUsed();
     }
-    return info;
+    String totalSize = RamUsageEstimator.humanReadableUnits(totalBytesUsed);
+    return new FieldCacheStats(totalSize, info);
   }
 
   public static int getUninvertedStatsSize() {
     return FieldCache.DEFAULT.getCacheEntries().length;
   }
 
-  public static String getTotalSize() {
-    CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
-    long totalBytesUsed = 0;
-    for (int i = 0; i < entries.length; i++) {
-      totalBytesUsed += entries[i].getValue().ramBytesUsed();
+  /**
+   * Return information about the backing cache
+   * @lucene.internal
+   */
+  public static class FieldCacheStats {
+    public String totalSize;
+    public String[] info;
+
+    public FieldCacheStats(String totalSize, String[] info) {
+      this.totalSize = totalSize;
+      this.info = info;
     }
-    return RamUsageEstimator.humanReadableUnits(totalBytesUsed);
+
   }
 }
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
index a705e1ec94a..35bdec643bc 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
@@ -69,6 +69,7 @@ public class TestSolrFieldCacheMBean extends SolrTestCaseJ4 {
     SolrFieldCacheMBean mbean = new SolrFieldCacheMBean();
     NamedList stats = checkJmx ? mbean.getStatisticsForJmx() : mbean.getStatistics();
     assert(new Integer(stats.get("entries_count").toString()) > 0);
+    assertNotNull(stats.get("total_size"));
     assertNotNull(stats.get("entry#0"));
   }
 
@@ -76,7 +77,7 @@ public class TestSolrFieldCacheMBean extends SolrTestCaseJ4 {
     SolrFieldCacheMBean mbean = new SolrFieldCacheMBean();
     NamedList stats = checkJmx ? mbean.getStatisticsForJmx() : mbean.getStatistics();
     assert(new Integer(stats.get("entries_count").toString()) > 0);
-    assertNotNull(stats.get("total_size"));
+    assertNull(stats.get("total_size"));
     assertNull(stats.get("entry#0"));
   }
 }

From 6525bb56f027655e5a01f028fa373305c0d01caa Mon Sep 17 00:00:00 2001
From: Chris Hostetter <hossman@apache.org>
Date: Wed, 14 Dec 2016 13:18:56 -0700
Subject: [PATCH 48/53] SOLR-8959: Refactored TestSegmentSorting out of
 TestMiniSolrCloudCluster

---
 solr/CHANGES.txt                              |   3 +
 .../cloud/SegmentTerminateEarlyTestState.java |  12 +-
 .../solr/cloud/TestMiniSolrCloudCluster.java  |  50 -------
 .../apache/solr/cloud/TestSegmentSorting.java | 133 ++++++++++++++++++
 4 files changed, 145 insertions(+), 53 deletions(-)
 create mode 100644 solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5f0357b3608..73b0e9b8539 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -290,6 +290,9 @@ Other Changes
 
 * SOLR-9846: OverseerAutoReplicaFailoverThread can take too long to stop and leak out of unit tests. (Mark Miller)
 
+* SOLR-8959: Refactored TestSegmentSorting out of TestMiniSolrCloudCluster (hossman)
+
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java b/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
index 199423b5b53..b3df9e78c69 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
@@ -22,6 +22,7 @@ import java.time.ZonedDateTime;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.Random;
 
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -47,7 +48,12 @@ class SegmentTerminateEarlyTestState {
   Integer maxTimestampMM = null;
 
   int numDocs = 0;
+  final Random rand;
 
+  public SegmentTerminateEarlyTestState(Random rand) {
+    this.rand = rand;
+  }
+  
   void addDocuments(CloudSolrClient cloudSolrClient,
       int numCommits, int numDocsPerCommit, boolean optimize) throws Exception {
     for (int cc = 1; cc <= numCommits; ++cc) {
@@ -56,7 +62,7 @@ class SegmentTerminateEarlyTestState {
         final Integer docKey = new Integer(numDocs);
         SolrInputDocument doc = new SolrInputDocument();
         doc.setField(keyField, ""+docKey);
-        final int MM = TestMiniSolrCloudCluster.random().nextInt(60); // minutes
+        final int MM = rand.nextInt(60); // minutes
         if (minTimestampMM == null || MM <= minTimestampMM.intValue()) {
           if (minTimestampMM != null && MM < minTimestampMM.intValue()) {
             minTimestampDocKeys.clear();
@@ -116,7 +122,7 @@ class SegmentTerminateEarlyTestState {
     query.setFields(keyField, oddField, timestampField);
     final int rowsWanted = 1;
     query.setRows(rowsWanted);
-    final Boolean shardsInfoWanted = (TestMiniSolrCloudCluster.random().nextBoolean() ? null : new Boolean(TestMiniSolrCloudCluster.random().nextBoolean()));
+    final Boolean shardsInfoWanted = (rand.nextBoolean() ? null : new Boolean(rand.nextBoolean()));
     if (shardsInfoWanted != null) {
       query.set(ShardParams.SHARDS_INFO, shardsInfoWanted.booleanValue());
     }
@@ -163,7 +169,7 @@ class SegmentTerminateEarlyTestState {
     query.setSort(timestampField, SolrQuery.ORDER.desc);
     query.setFields(keyField, oddField, timestampField);
     query.setRows(1);
-    final Boolean shardsInfoWanted = (TestMiniSolrCloudCluster.random().nextBoolean() ? null : new Boolean(TestMiniSolrCloudCluster.random().nextBoolean()));
+    final Boolean shardsInfoWanted = (rand.nextBoolean() ? null : new Boolean(rand.nextBoolean()));
     if (shardsInfoWanted != null) {
       query.set(ShardParams.SHARDS_INFO, shardsInfoWanted.booleanValue());
     }
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
index 97ecb67b0ab..de18875d69a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
@@ -16,7 +16,6 @@
  */
 package org.apache.solr.cloud;
 
-import java.io.File;
 import java.lang.invoke.MethodHandles;
 import java.net.URL;
 import java.util.ArrayList;
@@ -384,53 +383,4 @@ public class TestMiniSolrCloudCluster extends LuceneTestCase {
     }
   }
 
-  @Test
-  public void testSegmentTerminateEarly() throws Exception {
-
-    final String collectionName = "testSegmentTerminateEarlyCollection";
-
-    final SegmentTerminateEarlyTestState tstes = new SegmentTerminateEarlyTestState();
-
-    File solrXml = new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml");
-    Builder jettyConfig = JettyConfig.builder();
-    jettyConfig.waitForLoadingCoresToFinish(null);
-    final MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
-    final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
-    cloudSolrClient.setDefaultCollection(collectionName);
-
-    try {
-      // create collection
-      {
-        final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
-        final Map<String, String> collectionProperties = new HashMap<>();
-        collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-sortingmergepolicyfactory.xml");
-        createCollection(miniCluster, collectionName, null, asyncId, Boolean.TRUE, collectionProperties);
-      }
-
-      ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
-      AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
-
-      // add some documents, then optimize to get merged-sorted segments
-      tstes.addDocuments(cloudSolrClient, 10, 10, true);
-
-      // CommonParams.SEGMENT_TERMINATE_EARLY parameter intentionally absent
-      tstes.queryTimestampDescending(cloudSolrClient);
-
-      // add a few more documents, but don't optimize to have some not-merge-sorted segments
-      tstes.addDocuments(cloudSolrClient, 2, 10, false);
-
-      // CommonParams.SEGMENT_TERMINATE_EARLY parameter now present
-      tstes.queryTimestampDescendingSegmentTerminateEarlyYes(cloudSolrClient);
-      tstes.queryTimestampDescendingSegmentTerminateEarlyNo(cloudSolrClient);
-
-      // CommonParams.SEGMENT_TERMINATE_EARLY parameter present but it won't be used
-      tstes.queryTimestampDescendingSegmentTerminateEarlyYesGrouped(cloudSolrClient);
-      tstes.queryTimestampAscendingSegmentTerminateEarlyYes(cloudSolrClient); // uses a sort order that is _not_ compatible with the merge sort order
-
-    }
-    finally {
-      miniCluster.shutdown();
-    }
-  }
-
 }
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
new file mode 100644
index 00000000000..016b63e7aa3
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.lang.invoke.MethodHandles;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.index.TieredMergePolicy;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.index.TieredMergePolicyFactory;
+
+import org.junit.After;
+import org.junit.BeforeClass;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestSegmentSorting extends SolrCloudTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final int NUM_SERVERS = 5;
+  private static final int NUM_SHARDS = 2;
+  private static final int REPLICATION_FACTOR = 2;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(NUM_SERVERS).configure();
+  }
+
+  @After
+  public void ensureClusterEmpty() throws Exception {
+    cluster.deleteAllCollections();
+    cluster.getSolrClient().setDefaultCollection(null);
+  }
+  
+  private void createCollection(MiniSolrCloudCluster miniCluster, String collectionName, String createNodeSet, String asyncId,
+      Boolean indexToPersist, Map<String,String> collectionProperties) throws Exception {
+    String configName = "solrCloudCollectionConfig";
+    miniCluster.uploadConfigSet(SolrTestCaseJ4.TEST_PATH().resolve("collection1").resolve("conf"), configName);
+
+    final boolean persistIndex = (indexToPersist != null ? indexToPersist.booleanValue() : random().nextBoolean());
+    if (collectionProperties == null) {
+      collectionProperties = new HashMap<>();
+    }
+    collectionProperties.putIfAbsent(CoreDescriptor.CORE_CONFIG, "solrconfig-tlog.xml");
+    collectionProperties.putIfAbsent("solr.tests.maxBufferedDocs", "100000");
+    collectionProperties.putIfAbsent("solr.tests.ramBufferSizeMB", "100");
+    // use non-test classes so RandomizedRunner isn't necessary
+    if (random().nextBoolean()) {
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICY, TieredMergePolicy.class.getName());
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "true");
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "false");
+    } else {
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICYFACTORY, TieredMergePolicyFactory.class.getName());
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "true");
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "false");
+    }
+    collectionProperties.putIfAbsent("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
+    collectionProperties.putIfAbsent("solr.directoryFactory", (persistIndex ? "solr.StandardDirectoryFactory" : "solr.RAMDirectoryFactory"));
+
+    if (asyncId == null) {
+      CollectionAdminRequest.createCollection(collectionName, configName, NUM_SHARDS, REPLICATION_FACTOR)
+          .setCreateNodeSet(createNodeSet)
+          .setProperties(collectionProperties)
+          .process(miniCluster.getSolrClient());
+    }
+    else {
+      CollectionAdminRequest.createCollection(collectionName, configName, NUM_SHARDS, REPLICATION_FACTOR)
+          .setCreateNodeSet(createNodeSet)
+          .setProperties(collectionProperties)
+          .processAndWait(miniCluster.getSolrClient(), 30);
+    }
+  }
+
+
+  public void testSegmentTerminateEarly() throws Exception {
+
+    final String collectionName = "testSegmentTerminateEarlyCollection";
+
+    final SegmentTerminateEarlyTestState tstes = new SegmentTerminateEarlyTestState(random());
+    
+    final CloudSolrClient cloudSolrClient = cluster.getSolrClient();
+    cloudSolrClient.setDefaultCollection(collectionName);
+
+    // create collection
+    {
+      final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
+      final Map<String, String> collectionProperties = new HashMap<>();
+      collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-sortingmergepolicyfactory.xml");
+      createCollection(cluster, collectionName, null, asyncId, Boolean.TRUE, collectionProperties);
+    }
+    
+    ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
+    AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
+    
+    // add some documents, then optimize to get merged-sorted segments
+    tstes.addDocuments(cloudSolrClient, 10, 10, true);
+    
+    // CommonParams.SEGMENT_TERMINATE_EARLY parameter intentionally absent
+    tstes.queryTimestampDescending(cloudSolrClient);
+    
+    // add a few more documents, but don't optimize to have some not-merge-sorted segments
+    tstes.addDocuments(cloudSolrClient, 2, 10, false);
+    
+    // CommonParams.SEGMENT_TERMINATE_EARLY parameter now present
+    tstes.queryTimestampDescendingSegmentTerminateEarlyYes(cloudSolrClient);
+    tstes.queryTimestampDescendingSegmentTerminateEarlyNo(cloudSolrClient);
+    
+    // CommonParams.SEGMENT_TERMINATE_EARLY parameter present but it won't be used
+    tstes.queryTimestampDescendingSegmentTerminateEarlyYesGrouped(cloudSolrClient);
+    tstes.queryTimestampAscendingSegmentTerminateEarlyYes(cloudSolrClient); // uses a sort order that is _not_ compatible with the merge sort order
+    
+  }
+}

From e4f31fab2f98b7af6d2ec12a2eb3456521b446df Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Wed, 14 Dec 2016 18:00:51 -0500
Subject: [PATCH 49/53] LUCENE-7592: if segments file is truncated, throw
 CorruptIndexException

---
 lucene/CHANGES.txt                                         | 4 ++++
 .../src/java/org/apache/lucene/index/SegmentInfos.java     | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 15b89f09f2f..f38c0d5afda 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -138,6 +138,10 @@ Improvements
   necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
   See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
 
+* LUCENE-7592: If the segments file is truncated, we now throw
+  CorruptIndexException instead of the more confusing EOFException
+  (Mike Drob via Mike McCandless)
+
 Optimizations
 
 * LUCENE-7568: Optimize merging when index sorting is used but the
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
index 8f627cd743d..3e8b1f871bd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.index;
 
 
+import java.io.EOFException;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
@@ -277,7 +278,11 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
     long generation = generationFromSegmentsFileName(segmentFileName);
     //System.out.println(Thread.currentThread() + ": SegmentInfos.readCommit " + segmentFileName);
     try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
-      return readCommit(directory, input, generation);
+      try {
+        return readCommit(directory, input, generation);
+      } catch (EOFException e) {
+        throw new CorruptIndexException("Unexpected end of file while reading index.", input, e);
+      }
     }
   }
 

From 295cab7216ca76debaf4d354409741058a8641a1 Mon Sep 17 00:00:00 2001
From: Shai Erera <shaie@apache.org>
Date: Thu, 15 Dec 2016 12:52:37 +0200
Subject: [PATCH 50/53] LUCENE-7590: add sum, variance and stdev stats to
 NumericDVStats

---
 lucene/CHANGES.txt                            |  3 +
 .../apache/lucene/search/DocValuesStats.java  | 39 +++++++++++-
 .../search/TestDocValuesStatsCollector.java   | 62 ++++++++++++++++---
 3 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f38c0d5afda..0e327d28124 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -67,6 +67,9 @@ New features
 
 * LUCENE-7466: Added AxiomaticSimilarity. (Peilin Yang via Tommaso Teofili)
 
+* LUCENE-7590: Added DocValuesStatsCollector to compute statistics on DocValues
+  fields. (Shai Erera)
+
 Bug Fixes
 
 * LUCENE-7547: JapaneseTokenizerFactory was failing to close the
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
index 998bef4fe21..c8b775200d2 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -98,6 +98,7 @@ public abstract class DocValuesStats<T> {
   public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
 
     protected double mean = 0.0;
+    protected double variance = 0.0;
 
     protected NumericDocValues ndv;
 
@@ -116,15 +117,32 @@ public abstract class DocValuesStats<T> {
       return ndv.advanceExact(doc);
     }
 
-    /** The mean of all values of the field. Undefined when {@link #count} is zero. */
+    /** The mean of all values of the field. */
     public final double mean() {
       return mean;
     }
+
+    /** Returns the variance of all values of the field. */
+    public final double variance() {
+      int count = count();
+      return count > 0 ? variance / count : 0;
+    }
+
+    /** Returns the stdev of all values of the field. */
+    public final double stdev() {
+      return Math.sqrt(variance());
+    }
+
+    /** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */
+    public abstract T sum();
   }
 
   /** Holds DocValues statistics for a numeric field storing {@code long} values. */
   public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
 
+    // To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable.
+    private long sum = 0;
+
     public LongDocValuesStats(String field) {
       super(field, Long.MAX_VALUE, Long.MIN_VALUE);
     }
@@ -138,13 +156,24 @@ public abstract class DocValuesStats<T> {
       if (val < min) {
         min = val;
       }
+      sum += val;
+      double oldMean = mean;
       mean += (val - mean) / count;
+      variance += (val - mean) * (val - oldMean);
+    }
+
+    @Override
+    public Long sum() {
+      return sum;
     }
   }
 
   /** Holds DocValues statistics for a numeric field storing {@code double} values. */
   public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
 
+    // To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable.
+    private double sum = 0;
+
     public DoubleDocValuesStats(String field) {
       super(field, Double.MAX_VALUE, Double.MIN_VALUE);
     }
@@ -158,7 +187,15 @@ public abstract class DocValuesStats<T> {
       if (Double.compare(val, min) < 0) {
         min = val;
       }
+      sum += val;
+      double oldMean = mean;
       mean += (val - mean) / count;
+      variance += (val - mean) * (val - oldMean);
+    }
+
+    @Override
+    public Double sum() {
+      return sum;
     }
   }
 
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
index 65f82e62d42..8f8b09e6bac 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
+++ b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
@@ -18,6 +18,8 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.DoubleSummaryStatistics;
+import java.util.LongSummaryStatistics;
 import java.util.stream.DoubleStream;
 import java.util.stream.LongStream;
 
@@ -57,7 +59,33 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
     }
   }
 
-  public void testRandomDocsWithLongValues() throws IOException {
+  public void testOneDoc() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      String field = "numeric";
+      Document doc = new Document();
+      doc.add(new NumericDocValuesField(field, 1));
+      doc.add(new StringField("id", "doc1", Store.NO));
+      indexWriter.addDocument(doc);
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        LongDocValuesStats stats = new LongDocValuesStats(field);
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        assertEquals(1, stats.count());
+        assertEquals(0, stats.missing());
+        assertEquals(1, stats.max().longValue());
+        assertEquals(1, stats.min().longValue());
+        assertEquals(1, stats.sum().longValue());
+        assertEquals(1, stats.mean(), 0.0001);
+        assertEquals(0, stats.variance(), 0.0001);
+        assertEquals(0, stats.stdev(), 0.0001);
+      }
+    }
+  }
+
+  public void testDocsWithLongValues() throws IOException {
     try (Directory dir = newDirectory();
         IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
       String field = "numeric";
@@ -94,15 +122,20 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
         assertEquals(expCount, stats.count());
         assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
         if (stats.count() > 0) {
-          assertEquals(getPositiveValues(docValues).max().getAsLong(), stats.max().longValue());
-          assertEquals(getPositiveValues(docValues).min().getAsLong(), stats.min().longValue());
-          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+          LongSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
+          assertEquals(sumStats.getMax(), stats.max().longValue());
+          assertEquals(sumStats.getMin(), stats.min().longValue());
+          assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
+          assertEquals(sumStats.getSum(), stats.sum().longValue());
+          double variance = computeVariance(docValues, stats.mean, stats.count());
+          assertEquals(variance, stats.variance(), 0.00001);
+          assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
         }
       }
     }
   }
 
-  public void testRandomDocsWithDoubleValues() throws IOException {
+  public void testDocsWithDoubleValues() throws IOException {
     try (Directory dir = newDirectory();
         IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
       String field = "numeric";
@@ -139,9 +172,14 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
         assertEquals(expCount, stats.count());
         assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
         if (stats.count() > 0) {
-          assertEquals(getPositiveValues(docValues).max().getAsDouble(), stats.max().doubleValue(), 0.00001);
-          assertEquals(getPositiveValues(docValues).min().getAsDouble(), stats.min().doubleValue(), 0.00001);
-          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+          DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
+          assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
+          assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
+          assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
+          assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
+          double variance = computeVariance(docValues, stats.mean, stats.count());
+          assertEquals(variance, stats.variance(), 0.00001);
+          assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
         }
       }
     }
@@ -163,4 +201,12 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
     return Arrays.stream(docValues).filter(v -> v == 0);
   }
 
+  private static double computeVariance(long[] values, double mean, int count) {
+    return getPositiveValues(values).mapToDouble(v -> (v - mean) * (v-mean)).sum() / count;
+  }
+
+  private static double computeVariance(double[] values, double mean, int count) {
+    return getPositiveValues(values).map(v -> (v - mean) * (v-mean)).sum() / count;
+  }
+
 }

From 268d4ace3695ad3738402d623400fa4775b113ef Mon Sep 17 00:00:00 2001
From: Mike McCandless <mikemccand@apache.org>
Date: Thu, 15 Dec 2016 09:23:48 -0500
Subject: [PATCH 51/53] remove bad assertion

---
 .../src/java/org/apache/lucene/search/QueryUtils.java         | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index a3eaa80ed07..ae4c89023d4 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -93,10 +93,6 @@ public class QueryUtils {
   public static void checkUnequal(Query q1, Query q2) {
     assertFalse(q1 + " equal to " + q2, q1.equals(q2));
     assertFalse(q2 + " equal to " + q1, q2.equals(q1));
-
-    // possible this test can fail on a hash collision... if that
-    // happens, please change test to use a different example.
-    assertTrue(q1.hashCode() != q2.hashCode());
   }
 
   /** deep check that explanations of a query 'score' correctly */

From ea1569e2914f9ba914b582a0801d6cb83a29529b Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Thu, 15 Dec 2016 16:30:15 +0100
Subject: [PATCH 52/53] LUCENE-7572: Cache the hash code of doc values queries.

---
 lucene/CHANGES.txt                            |   2 +
 .../apache/lucene/index/PrefixCodedTerms.java |   4 +-
 .../lucene/search/DocValuesNumbersQuery.java  |  26 +--
 .../lucene/search/DocValuesTermsQuery.java    |  49 ++++--
 .../org/apache/lucene/search/LongHashSet.java | 156 ++++++++++++++++++
 .../lucene/search/LongHashSetTests.java       | 100 +++++++++++
 .../search/TestDocValuesTermsQuery.java       |   1 +
 7 files changed, 310 insertions(+), 28 deletions(-)
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java
 create mode 100644 lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0e327d28124..bacc2703ae3 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -158,6 +158,8 @@ Optimizations
   writing to disk, giving a small speedup in points-heavy use cases.
   (Mike McCandless)
 
+* LUCENE-7572: Doc values queries now cache their hash code. (Adrien Grand)
+
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
diff --git a/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java b/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
index 3dca3dba927..df1653bcd4d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
@@ -28,7 +28,9 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 
 /**
- * Prefix codes term instances (prefixes are shared)
+ * Prefix codes term instances (prefixes are shared). This is expected to be
+ * faster to build than a FST and might also be more compact if there are no
+ * common suffixes.
  * @lucene.internal
  */
 public class PrefixCodedTerms implements Accountable {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
index 0fd22449ee4..772570372f4 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.Objects;
 import java.util.Set;
@@ -45,11 +46,16 @@ import org.apache.lucene.index.SortedNumericDocValues;
 public class DocValuesNumbersQuery extends Query {
 
   private final String field;
-  private final Set<Long> numbers;
+  private final LongHashSet numbers;
 
-  public DocValuesNumbersQuery(String field, Set<Long> numbers) {
+  public DocValuesNumbersQuery(String field, long[] numbers) {
     this.field = Objects.requireNonNull(field);
-    this.numbers = Objects.requireNonNull(numbers, "Set of numbers must not be null");
+    this.numbers = new LongHashSet(numbers);
+  }
+
+  public DocValuesNumbersQuery(String field, Collection<Long> numbers) {
+    this.field = Objects.requireNonNull(field);
+    this.numbers = new LongHashSet(numbers.stream().mapToLong(Long::longValue).toArray());
   }
 
   public DocValuesNumbersQuery(String field, Long... numbers) {
@@ -82,15 +88,11 @@ public class DocValuesNumbersQuery extends Query {
 
   @Override
   public String toString(String defaultField) {
-    StringBuilder sb = new StringBuilder();
-    sb.append(field).append(": [");
-    for (Long number : numbers) {
-      sb.append(number).append(", ");
-    }
-    if (numbers.size() > 0) {
-      sb.setLength(sb.length() - 2);
-    }
-    return sb.append(']').toString();
+    return new StringBuilder()
+        .append(field)
+        .append(": ")
+        .append(numbers.toString())
+        .toString();
   }
 
   @Override
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
index 6d852a872ae..6e30baed9cd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
@@ -25,7 +25,10 @@ import java.util.Objects;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PrefixCodedTerms;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
@@ -91,13 +94,24 @@ import org.apache.lucene.util.LongBitSet;
 public class DocValuesTermsQuery extends Query {
 
   private final String field;
-  private final BytesRef[] terms;
+  private final PrefixCodedTerms termData;
+  private final int termDataHashCode; // cached hashcode of termData
 
   public DocValuesTermsQuery(String field, Collection<BytesRef> terms) {
     this.field = Objects.requireNonNull(field);
     Objects.requireNonNull(terms, "Collection of terms must not be null");
-    this.terms = terms.toArray(new BytesRef[terms.size()]);
-    ArrayUtil.timSort(this.terms);
+    BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
+    ArrayUtil.timSort(sortedTerms);
+    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
+    BytesRef previous = null;
+    for (BytesRef term : sortedTerms) {
+      if (term.equals(previous) == false) {
+        builder.add(field, term);
+      }
+      previous = term;
+    }
+    termData = builder.finish();
+    termDataHashCode = termData.hashCode();
   }
 
   public DocValuesTermsQuery(String field, BytesRef... terms) {
@@ -124,26 +138,30 @@ public class DocValuesTermsQuery extends Query {
   }
 
   private boolean equalsTo(DocValuesTermsQuery other) {
-    return field.equals(other.field) &&
-           Arrays.equals(terms, other.terms);
+    // termData might be heavy to compare so check the hash code first
+    return termDataHashCode == other.termDataHashCode && 
+           termData.equals(other.termData);
   }
 
   @Override
   public int hashCode() {
-    return 31 * classHash() + Objects.hash(field, Arrays.asList(terms));
+    return 31 * classHash() + termDataHashCode;
   }
 
   @Override
   public String toString(String defaultField) {
-    StringBuilder sb = new StringBuilder();
-    sb.append(field).append(": [");
-    for (BytesRef term : terms) {
-      sb.append(term).append(", ");
+    StringBuilder builder = new StringBuilder();
+    boolean first = true;
+    TermIterator iterator = termData.iterator();
+    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+      if (!first) {
+        builder.append(' ');
+      }
+      first = false;
+      builder.append(new Term(iterator.field(), term).toString());
     }
-    if (terms.length > 0) {
-      sb.setLength(sb.length() - 2);
-    }
-    return sb.append(']').toString();
+
+    return builder.toString();
   }
 
   @Override
@@ -155,7 +173,8 @@ public class DocValuesTermsQuery extends Query {
         final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
         final LongBitSet bits = new LongBitSet(values.getValueCount());
         boolean matchesAtLeastOneTerm = false;
-        for (BytesRef term : terms) {
+        TermIterator iterator = termData.iterator();
+        for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
           final long ord = values.lookupTerm(term);
           if (ord >= 0) {
             matchesAtLeastOneTerm = true;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java b/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java
new file mode 100644
index 00000000000..3a6af5fbe70
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.lucene.util.packed.PackedInts;
+
+final class LongHashSet extends AbstractSet<Long> {
+
+  private static final long MISSING = Long.MIN_VALUE;
+
+  final long[] table;
+  final int mask;
+  final boolean hasMissingValue;
+  final int size;
+  final int hashCode;
+
+  LongHashSet(long... values) {
+    int tableSize = Math.toIntExact(values.length * 3L / 2);
+    tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2
+    assert tableSize >= values.length * 3L / 2;
+    table = new long[tableSize];
+    Arrays.fill(table, MISSING);
+    mask = tableSize - 1;
+    boolean hasMissingValue = false;
+    int size = 0;
+    int hashCode = 0;
+    for (long value : values) {
+      if (value == MISSING || add(value)) {
+        if (value == MISSING) {
+          hasMissingValue = true;
+        }
+        ++size;
+        hashCode += Long.hashCode(value);
+      }
+    }
+    this.hasMissingValue = hasMissingValue;
+    this.size = size;
+    this.hashCode = hashCode;
+  }
+
+  private boolean add(long l) {
+    assert l != MISSING;
+    final int slot = Long.hashCode(l) & mask;
+    for (int i = slot; ; i = (i + 1) & mask) {
+      if (table[i] == MISSING) {
+        table[i] = l;
+        return true;
+      } else if (table[i] == l) {
+        // already added
+        return false;
+      }
+    }
+  }
+
+  boolean contains(long l) {
+    if (l == MISSING) {
+      return hasMissingValue;
+    }
+    final int slot = Long.hashCode(l) & mask;
+    for (int i = slot; ; i = (i + 1) & mask) {
+      if (table[i] == MISSING) {
+        return false;
+      } else if (table[i] == l) {
+        return true;
+      }
+    }
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+
+  @Override
+  public int hashCode() {
+    return hashCode;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj != null && obj.getClass() == LongHashSet.class) {
+      LongHashSet that = (LongHashSet) obj;
+      if (hashCode != that.hashCode
+          || size != that.size
+          || hasMissingValue != that.hasMissingValue) {
+        return false;
+      }
+      for (long v : table) {
+        if (v != MISSING && that.contains(v) == false) {
+          return false;
+        }
+      }
+      return true;
+    }
+    return super.equals(obj);
+  }
+
+  @Override
+  public boolean contains(Object o) {
+    return o instanceof Long && contains(((Long) o).longValue());
+  }
+
+  @Override
+  public Iterator<Long> iterator() {
+    return new Iterator<Long>() {
+
+      private boolean hasNext = hasMissingValue;
+      private int i = -1;
+      private long value = MISSING;
+
+      @Override
+      public boolean hasNext() {
+        if (hasNext) {
+          return true;
+        }
+        while (++i < table.length) {
+          value = table[i];
+          if (value != MISSING) {
+            return hasNext = true;
+          }
+        }
+        return false;
+      }
+
+      @Override
+      public Long next() {
+        if (hasNext() == false) {
+          throw new NoSuchElementException();
+        }
+        hasNext = false;
+        return value;
+      }
+
+    };
+  }
+
+}
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java b/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java
new file mode 100644
index 00000000000..25d94a6dbbc
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.LongStream;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class LongHashSetTests extends LuceneTestCase {
+
+  private void assertEquals(Set<Long> set1, LongHashSet set2) {
+    LuceneTestCase.assertEquals(set1, set2);
+    LuceneTestCase.assertEquals(set2, set1);
+    LuceneTestCase.assertEquals(set2, set2);
+    assertEquals(set1.hashCode(), set2.hashCode());
+
+    if (set1.isEmpty() == false) {
+      Set<Long> set3 = new HashSet<>(set1);
+      long removed = set3.iterator().next();
+      while (true) {
+        long next = random().nextLong();
+        if (next != removed && set3.add(next)) {
+          break;
+        }
+      }
+      assertNotEquals(set3, set2);
+    }
+  }
+
+  private void assertNotEquals(Set<Long> set1, LongHashSet set2) {
+    assertFalse(set1.equals(set2));
+    assertFalse(set2.equals(set1));
+    LongHashSet set3 = new LongHashSet(set1.stream().mapToLong(Long::longValue).toArray());
+    assertFalse(set2.equals(set3));
+  }
+
+  public void testEmpty() {
+    Set<Long> set1 = new HashSet<>();
+    LongHashSet set2 = new LongHashSet();
+    assertEquals(set1, set2);
+  }
+
+  public void testOneValue() {
+    Set<Long> set1 = new HashSet<>(Arrays.asList(42L));
+    LongHashSet set2 = new LongHashSet(42);
+    assertEquals(set1, set2);
+
+    set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE));
+    set2 = new LongHashSet(Long.MIN_VALUE);
+    assertEquals(set1, set2);
+  }
+
+  public void testTwoValues() {
+    Set<Long> set1 = new HashSet<>(Arrays.asList(42L, Long.MAX_VALUE));
+    LongHashSet set2 = new LongHashSet(42, Long.MAX_VALUE);
+    assertEquals(set1, set2);
+
+    set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE, 42L));
+    set2 = new LongHashSet(Long.MIN_VALUE, 42L);
+    assertEquals(set1, set2);
+  }
+
+  public void testRandom() {
+    final int iters = atLeast(10);
+    for (int iter = 0; iter < iters; ++iter) {
+      long[] values = new long[random().nextInt(1 << random().nextInt(16))];
+      for (int i = 0; i < values.length; ++i) {
+        if (i == 0 || random().nextInt(10) < 9) {
+          values[i] = random().nextLong();
+        } else {
+          values[i] = values[random().nextInt(i)];
+        }
+      }
+      if (values.length > 0 && random().nextBoolean()) {
+        values[values.length/2] = Long.MIN_VALUE;
+      }
+      Set<Long> set1 = LongStream.of(values).mapToObj(Long::valueOf).collect(Collectors.toCollection(HashSet::new));
+      LongHashSet set2 = new LongHashSet(values);
+      assertEquals(set1, set2);
+    }
+  }
+}
\ No newline at end of file
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
index 6e994927947..187f172b9f9 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
@@ -38,6 +38,7 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
 
   public void testEquals() {
     assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar"));
+    assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar", "bar"));
     assertEquals(new DocValuesTermsQuery("foo", "bar", "baz"), new DocValuesTermsQuery("foo", "baz", "bar"));
     assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo2", "bar")));
     assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo", "baz")));

From 3b182aa2fb3e4062f6ec5be819f3aa70aa2e523d Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Thu, 15 Dec 2016 16:33:36 +0100
Subject: [PATCH 53/53] LUCENE-7589: Prevent outliers from raising the bpv for
 everyone.

---
 lucene/CHANGES.txt                            |   4 +
 .../lucene70/Lucene70DocValuesConsumer.java   | 163 ++++++++++---
 .../lucene70/Lucene70DocValuesFormat.java     |   5 +-
 .../lucene70/Lucene70DocValuesProducer.java   | 220 ++++++++++++++----
 .../org/apache/lucene/util/LongValues.java    |   9 +
 .../lucene/util/packed/DirectWriter.java      |   8 +-
 .../lucene70/TestLucene70DocValuesFormat.java | 152 ++++++++++++
 7 files changed, 479 insertions(+), 82 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index bacc2703ae3..7e614693fd5 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -48,6 +48,10 @@ Optimizations
 * LUCENE-7519: Add optimized APIs to compute browse-only top level
   facets (Mike McCandless)
 
+* LUCENE-7589: Numeric doc values now have the ability to encode blocks of
+  values using different numbers of bits per value if this proves to save
+  storage. (Adrien Grand)
+
 Other
 
 * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
index e1b66e13eb0..2dd68e9e82b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
@@ -18,6 +18,8 @@ package org.apache.lucene.codecs.lucene70;
 
 
 import static org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE;
 
 import java.io.Closeable; // javadocs
 import java.io.IOException;
@@ -42,6 +44,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
@@ -112,12 +115,46 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
     });
   }
 
+  private static class MinMaxTracker {
+    long min, max, numValues, spaceInBits;
+
+    MinMaxTracker() {
+      reset();
+      spaceInBits = 0;
+    }
+
+    private void reset() {
+      min = Long.MAX_VALUE;
+      max = Long.MIN_VALUE;
+      numValues = 0;
+    }
+
+    /** Accumulate a new value. */
+    void update(long v) {
+      min = Math.min(min, v);
+      max = Math.max(max, v);
+      ++numValues;
+    }
+
+    /** Update the required space. */
+    void finish() {
+      if (max > min) {
+        spaceInBits += DirectWriter.unsignedBitsRequired(max - min) * numValues;
+      }
+    }
+
+    /** Update space usage and get ready for accumulating values for the next block. */
+    void nextBlock() {
+      finish();
+      reset();
+    }
+  }
+
   private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
     int numDocsWithValue = 0;
-    long numValues = 0;
-    long min = Long.MAX_VALUE;
-    long max = Long.MIN_VALUE;
+    MinMaxTracker minMax = new MinMaxTracker();
+    MinMaxTracker blockMinMax = new MinMaxTracker();
     long gcd = 0;
     Set<Long> uniqueValues = new HashSet<>();
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
@@ -130,26 +167,35 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
             // wrong results. Since these extreme values are unlikely, we just discard
             // GCD computation for them
             gcd = 1;
-          } else if (numValues != 0) { // minValue needs to be set first
-            gcd = MathUtil.gcd(gcd, v - min);
+          } else if (minMax.numValues != 0) { // minValue needs to be set first
+            gcd = MathUtil.gcd(gcd, v - minMax.min);
           }
         }
 
-        min = Math.min(min, v);
-        max = Math.max(max, v);
+        minMax.update(v);
+        blockMinMax.update(v);
+        if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) {
+          blockMinMax.nextBlock();
+        }
 
         if (uniqueValues != null
             && uniqueValues.add(v)
             && uniqueValues.size() > 256) {
           uniqueValues = null;
         }
-
-        numValues++;
       }
 
       numDocsWithValue++;
     }
 
+    minMax.finish();
+    blockMinMax.finish();
+
+    final long numValues = minMax.numValues;
+    long min = minMax.min;
+    final long max = minMax.max;
+    assert blockMinMax.spaceInBits <= minMax.spaceInBits;
+
     if (numDocsWithValue == 0) {
       meta.writeLong(-2);
       meta.writeLong(0L);
@@ -166,6 +212,7 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
 
     meta.writeLong(numValues);
     final int numBitsPerValue;
+    boolean doBlocks = false;
     Map<Long, Integer> encode = null;
     if (min >= max) {
       numBitsPerValue = 0;
@@ -189,12 +236,19 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
         gcd = 1;
       } else {
         uniqueValues = null;
-        numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
-        if (gcd == 1 && min > 0
-            && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) {
-          min = 0;
+        // we do blocks if that appears to save 10+% storage
+        doBlocks = minMax.spaceInBits > 0 && (double) blockMinMax.spaceInBits / minMax.spaceInBits <= 0.9;
+        if (doBlocks) {
+          numBitsPerValue = 0xFF;
+          meta.writeInt(-2 - NUMERIC_BLOCK_SHIFT);
+        } else {
+          numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
+          if (gcd == 1 && min > 0
+              && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) {
+            min = 0;
+          }
+          meta.writeInt(-1);
         }
-        meta.writeInt(-1);
       }
     }
 
@@ -203,26 +257,79 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
     meta.writeLong(gcd);
     long startOffset = data.getFilePointer();
     meta.writeLong(startOffset);
-    if (numBitsPerValue != 0) {
-      values = valuesProducer.getSortedNumeric(field);
-      DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
-      for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-        for (int i = 0, count = values.docValueCount(); i < count; ++i) {
-          long v = values.nextValue();
-          if (encode == null) {
-            writer.add((v - min) / gcd);
-          } else {
-            writer.add(encode.get(v));
-          }
-        }
-      }
-      writer.finish();
+    if (doBlocks) {
+      writeValuesMultipleBlocks(valuesProducer.getSortedNumeric(field), gcd);
+    } else if (numBitsPerValue != 0) {
+      writeValuesSingleBlock(valuesProducer.getSortedNumeric(field), numValues, numBitsPerValue, min, gcd, encode);
     }
     meta.writeLong(data.getFilePointer() - startOffset);
 
     return new long[] {numDocsWithValue, numValues};
   }
 
+  private void writeValuesSingleBlock(SortedNumericDocValues values, long numValues, int numBitsPerValue,
+      long min, long gcd, Map<Long, Integer> encode) throws IOException {
+    DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+      for (int i = 0, count = values.docValueCount(); i < count; ++i) {
+        long v = values.nextValue();
+        if (encode == null) {
+          writer.add((v - min) / gcd);
+        } else {
+          writer.add(encode.get(v));
+        }
+      }
+    }
+    writer.finish();
+  }
+ 
+  private void writeValuesMultipleBlocks(SortedNumericDocValues values, long gcd) throws IOException {
+    final long[] buffer = new long[NUMERIC_BLOCK_SIZE];
+    final GrowableByteArrayDataOutput encodeBuffer = new GrowableByteArrayDataOutput(NUMERIC_BLOCK_SIZE);
+    int upTo = 0;
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+      for (int i = 0, count = values.docValueCount(); i < count; ++i) {
+        buffer[upTo++] = values.nextValue();
+        if (upTo == NUMERIC_BLOCK_SIZE) {
+          writeBlock(buffer, NUMERIC_BLOCK_SIZE, gcd, encodeBuffer);
+          upTo = 0;
+        }
+      }
+    }
+    if (upTo > 0) {
+      writeBlock(buffer, upTo, gcd, encodeBuffer);
+    }
+  }
+
+  private void writeBlock(long[] values, int length, long gcd, GrowableByteArrayDataOutput buffer) throws IOException {
+    assert length > 0;
+    long min = values[0];
+    long max = values[0];
+    for (int i = 1; i < length; ++i) {
+      final long v = values[i];
+      assert Math.floorMod(values[i] - min, gcd) == 0;
+      min = Math.min(min, v);
+      max = Math.max(max, v);
+    }
+    if (min == max) {
+      data.writeByte((byte) 0);
+      data.writeLong(min);
+    } else {
+      final int bitsPerValue = DirectWriter.unsignedBitsRequired(max - min);
+      buffer.reset();
+      assert buffer.getPosition() == 0;
+      final DirectWriter w = DirectWriter.getInstance(buffer, length, bitsPerValue);
+      for (int i = 0; i < length; ++i) {
+        w.add((values[i] - min) / gcd);
+      }
+      w.finish();
+      data.writeByte((byte) bitsPerValue);
+      data.writeLong(min);
+      data.writeInt(buffer.getPosition());
+      data.writeBytes(buffer.getBytes(), buffer.getPosition());
+    }
+  }
+
   @Override
   public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     meta.writeInt(field.number);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
index ee477d666ee..2ce2124ff34 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
@@ -146,10 +146,11 @@ public final class Lucene70DocValuesFormat extends DocValuesFormat {
   static final byte SORTED_SET = 3;
   static final byte SORTED_NUMERIC = 4;
 
-  // addressing uses 16k blocks
-  static final int MONOTONIC_BLOCK_SIZE = 16384;
   static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
 
+  static final int NUMERIC_BLOCK_SHIFT = 14;
+  static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT;
+
   static final int TERMS_DICT_BLOCK_SHIFT = 4;
   static final int TERMS_DICT_BLOCK_SIZE = 1 << TERMS_DICT_BLOCK_SHIFT;
   static final int TERMS_DICT_BLOCK_MASK = TERMS_DICT_BLOCK_SIZE - 1;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index 3f3e73f2e38..386655e8c77 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -144,7 +144,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     entry.docsWithFieldLength = meta.readLong();
     entry.numValues = meta.readLong();
     int tableSize = meta.readInt();
-    if (tableSize < -1 || tableSize > 256) {
+    if (tableSize > 256) {
       throw new CorruptIndexException("invalid table size: " + tableSize, meta);
     }
     if (tableSize >= 0) {
@@ -154,6 +154,11 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         entry.table[i] = meta.readLong();
       }
     }
+    if (tableSize < -1) {
+      entry.blockShift = -2 - tableSize;
+    } else {
+      entry.blockShift = -1;
+    }
     entry.bitsPerValue = meta.readByte();
     entry.minValue = meta.readLong();
     entry.gcd = meta.readLong();
@@ -260,6 +265,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
 
   private static class NumericEntry {
     long[] table;
+    int blockShift;
     byte bitsPerValue;
     long docsWithFieldOffset;
     long docsWithFieldLength;
@@ -429,24 +435,62 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         };
       } else {
         final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
-        if (entry.table != null) {
-          final long[] table = entry.table;
+        if (entry.blockShift >= 0) {
+          // dense but split into blocks of different bits per value
+          final int shift = entry.blockShift;
+          final long mul = entry.gcd;
+          final int mask = (1 << shift) - 1;
           return new DenseNumericDocValues(maxDoc) {
+            int block = -1;
+            long delta;
+            long offset;
+            long blockEndOffset;
+            LongValues values;
+
             @Override
             public long longValue() throws IOException {
-              return table[(int) values.get(doc)];
+              final int block = doc >>> shift;
+              if (this.block != block) {
+                int bitsPerValue;
+                do {
+                  offset = blockEndOffset;
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                  this.block ++;
+                } while (this.block != block);
+                values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+              }
+              return mul * values.get(doc & mask) + delta;
             }
           };
         } else {
-          final long mul = entry.gcd;
-          final long delta = entry.minValue;
-          return new DenseNumericDocValues(maxDoc) {
-            @Override
-            public long longValue() throws IOException {
-              return mul * values.get(doc) + delta;
-            }
-          };
+          final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
+          if (entry.table != null) {
+            final long[] table = entry.table;
+            return new DenseNumericDocValues(maxDoc) {
+              @Override
+              public long longValue() throws IOException {
+                return table[(int) values.get(doc)];
+              }
+            };
+          } else {
+            final long mul = entry.gcd;
+            final long delta = entry.minValue;
+            return new DenseNumericDocValues(maxDoc) {
+              @Override
+              public long longValue() throws IOException {
+                return mul * values.get(doc) + delta;
+              }
+            };
+          }
         }
       }
     } else {
@@ -461,24 +505,63 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         };
       } else {
         final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
-        if (entry.table != null) {
-          final long[] table = entry.table;
+        if (entry.blockShift >= 0) {
+          // sparse and split into blocks of different bits per value
+          final int shift = entry.blockShift;
+          final long mul = entry.gcd;
+          final int mask = (1 << shift) - 1;
           return new SparseNumericDocValues(disi) {
+            int block = -1;
+            long delta;
+            long offset;
+            long blockEndOffset;
+            LongValues values;
+
             @Override
             public long longValue() throws IOException {
-              return table[(int) values.get(disi.index())];
+              final int index = disi.index();
+              final int block = index >>> shift;
+              if (this.block != block) {
+                int bitsPerValue;
+                do {
+                  offset = blockEndOffset;
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                  this.block ++;
+                } while (this.block != block);
+                values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+              }
+              return mul * values.get(index & mask) + delta;
             }
           };
         } else {
-          final long mul = entry.gcd;
-          final long delta = entry.minValue;
-          return new SparseNumericDocValues(disi) {
-            @Override
-            public long longValue() throws IOException {
-              return mul * values.get(disi.index()) + delta;
-            }
-          };
+          final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
+          if (entry.table != null) {
+            final long[] table = entry.table;
+            return new SparseNumericDocValues(disi) {
+              @Override
+              public long longValue() throws IOException {
+                return table[(int) values.get(disi.index())];
+              }
+            };
+          } else {
+            final long mul = entry.gcd;
+            final long delta = entry.minValue;
+            return new SparseNumericDocValues(disi) {
+              @Override
+              public long longValue() throws IOException {
+                return mul * values.get(disi.index()) + delta;
+              }
+            };
+          }
         }
       }
     }
@@ -494,34 +577,75 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-      final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
-      if (entry.table != null) {
-        final long[] table = entry.table;
+      if (entry.blockShift >= 0) {
+        final int shift = entry.blockShift;
+        final long mul = entry.gcd;
+        final long mask = (1L << shift) - 1;
         return new LongValues() {
-          @Override
+          long block = -1;
+          long delta;
+          long offset;
+          long blockEndOffset;
+          LongValues values;
+
           public long get(long index) {
-            return table[(int) values.get(index)];
-          }
-        };
-      } else if (entry.gcd != 1) {
-        final long gcd = entry.gcd;
-        final long minValue = entry.minValue;
-        return new LongValues() {
-          @Override
-          public long get(long index) {
-            return values.get(index) * gcd + minValue;
-          }
-        };
-      } else if (entry.minValue != 0) {
-        final long minValue = entry.minValue;
-        return new LongValues() {
-          @Override
-          public long get(long index) {
-            return values.get(index) + minValue;
+            final long block = index >>> shift;
+            if (this.block != block) {
+              assert block > this.block : "Reading backwards is illegal: " + this.block + " < " + block;
+              int bitsPerValue;
+              do {
+                offset = blockEndOffset;
+                try {
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
+                this.block ++;
+              } while (this.block != block);
+              values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+            }
+            return mul * values.get(index & mask) + delta;
           }
         };
       } else {
-        return values;
+        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
+        if (entry.table != null) {
+          final long[] table = entry.table;
+          return new LongValues() {
+            @Override
+            public long get(long index) {
+              return table[(int) values.get(index)];
+            }
+          };
+        } else if (entry.gcd != 1) {
+          final long gcd = entry.gcd;
+          final long minValue = entry.minValue;
+          return new LongValues() {
+            @Override
+            public long get(long index) {
+              return values.get(index) * gcd + minValue;
+            }
+          };
+        } else if (entry.minValue != 0) {
+          final long minValue = entry.minValue;
+          return new LongValues() {
+            @Override
+            public long get(long index) {
+              return values.get(index) + minValue;
+            }
+          };
+        } else {
+          return values;
+        }
       }
     }
   }
diff --git a/lucene/core/src/java/org/apache/lucene/util/LongValues.java b/lucene/core/src/java/org/apache/lucene/util/LongValues.java
index 23f4d32fc9a..04fbf81dc1c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/LongValues.java
+++ b/lucene/core/src/java/org/apache/lucene/util/LongValues.java
@@ -30,6 +30,15 @@ public abstract class LongValues  {
 
   };
 
+  public static final LongValues ZEROES = new LongValues() {
+
+    @Override
+    public long get(long index) {
+      return 0;
+    }
+
+  };
+
   /** Get value at <code>index</code>. */
   public abstract long get(long index);
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
index 9a7f18eb26f..5a38445d20d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
@@ -21,7 +21,7 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.DataOutput;
 
 /** 
  * Class for writing packed integers to be directly read from Directory.
@@ -44,7 +44,7 @@ import org.apache.lucene.store.IndexOutput;
 public final class DirectWriter {
   final int bitsPerValue;
   final long numValues;
-  final IndexOutput output;
+  final DataOutput output;
   
   long count;
   boolean finished;
@@ -56,7 +56,7 @@ public final class DirectWriter {
   final BulkOperation encoder;
   final int iterations;
   
-  DirectWriter(IndexOutput output, long numValues, int bitsPerValue) {
+  DirectWriter(DataOutput output, long numValues, int bitsPerValue) {
     this.output = output;
     this.numValues = numValues;
     this.bitsPerValue = bitsPerValue;
@@ -103,7 +103,7 @@ public final class DirectWriter {
   }
   
   /** Returns an instance suitable for encoding {@code numValues} using {@code bitsPerValue} */
-  public static DirectWriter getInstance(IndexOutput output, long numValues, int bitsPerValue) {
+  public static DirectWriter getInstance(DataOutput output, long numValues, int bitsPerValue) {
     if (Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) < 0) {
       throw new IllegalArgumentException("Unsupported bitsPerValue " + bitsPerValue + ". Did you use bitsRequired?");
     }
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
index 8661298b51d..6cca55e3a48 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
@@ -25,6 +25,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.function.LongSupplier;
 import java.util.function.Supplier;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -61,6 +62,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMFile;
@@ -534,4 +536,154 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
       dir.close();
     }
   }
+
+  @Slow
+  public void testSortedNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSortedNumericBlocksOfVariousBitsPerValue(() -> TestUtil.nextInt(random(), 1, 3));
+  }
+
+  @Slow
+  public void testSparseSortedNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSortedNumericBlocksOfVariousBitsPerValue(() -> TestUtil.nextInt(random(), 0, 2));
+  }
+
+  @Slow
+  public void testNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSparseNumericBlocksOfVariousBitsPerValue(1);
+  }
+
+  @Slow
+  public void testSparseNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSparseNumericBlocksOfVariousBitsPerValue(random().nextDouble());
+  }
+
+  private static LongSupplier blocksOfVariousBPV() {
+    final long mul = TestUtil.nextInt(random(), 1, 100);
+    final long min = random().nextInt();
+    return new LongSupplier() {
+      int i = Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE;
+      int maxDelta;
+      @Override
+      public long getAsLong() {
+        if (i == Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE) {
+          maxDelta = 1 << random().nextInt(5);
+          i = 0;
+        }
+        i++;
+        return min + mul * random().nextInt(maxDelta);
+      }
+    };
+  }
+
+  private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    conf.setMaxBufferedDocs(atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE));
+    conf.setRAMBufferSizeMB(-1);
+    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    final int numDocs = atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
+    final LongSupplier values = blocksOfVariousBPV();
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      
+      int valueCount = (int) counts.getAsLong();
+      long valueArray[] = new long[valueCount];
+      for (int j = 0; j < valueCount; j++) {
+        long value = values.getAsLong();
+        valueArray[j] = value;
+        doc.add(new SortedNumericDocValuesField("dv", value));
+      }
+      Arrays.sort(valueArray);
+      for (int j = 0; j < valueCount; j++) {
+        doc.add(new StoredField("stored", Long.toString(valueArray[j])));
+      }
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    writer.forceMerge(1);
+
+    writer.close();
+    
+    // compare
+    DirectoryReader ir = DirectoryReader.open(dir);
+    TestUtil.checkReader(ir);
+    for (LeafReaderContext context : ir.leaves()) {
+      LeafReader r = context.reader();
+      SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
+      for (int i = 0; i < r.maxDoc(); i++) {
+        if (i > docValues.docID()) {
+          docValues.nextDoc();
+        }
+        String expected[] = r.document(i).getValues("stored");
+        if (i < docValues.docID()) {
+          assertEquals(0, expected.length);
+        } else {
+          String actual[] = new String[docValues.docValueCount()];
+          for (int j = 0; j < actual.length; j++) {
+            actual[j] = Long.toString(docValues.nextValue());
+          }
+          assertArrayEquals(expected, actual);
+        }
+      }
+    }
+    ir.close();
+    dir.close();
+  }
+
+  private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    conf.setMaxBufferedDocs(atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE));
+    conf.setRAMBufferSizeMB(-1);
+    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    Document doc = new Document();
+    Field storedField = newStringField("stored", "", Field.Store.YES);
+    Field dvField = new NumericDocValuesField("dv", 0);
+    doc.add(storedField);
+    doc.add(dvField);
+
+    final int numDocs = atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
+    final LongSupplier longs = blocksOfVariousBPV();
+    for (int i = 0; i < numDocs; i++) {
+      if (random().nextDouble() > density) {
+        writer.addDocument(new Document());
+        continue;
+      }
+      long value = longs.getAsLong();
+      storedField.setStringValue(Long.toString(value));
+      dvField.setLongValue(value);
+      writer.addDocument(doc);
+    }
+
+    writer.forceMerge(1);
+
+    writer.close();
+    
+    // compare
+    DirectoryReader ir = DirectoryReader.open(dir);
+    TestUtil.checkReader(ir);
+    for (LeafReaderContext context : ir.leaves()) {
+      LeafReader r = context.reader();
+      NumericDocValues docValues = DocValues.getNumeric(r, "dv");
+      docValues.nextDoc();
+      for (int i = 0; i < r.maxDoc(); i++) {
+        String storedValue = r.document(i).get("stored");
+        if (storedValue == null) {
+          assertTrue(docValues.docID() > i);
+        } else {
+          assertEquals(i, docValues.docID());
+          assertEquals(Long.parseLong(storedValue), docValues.longValue());
+          docValues.nextDoc();
+        }
+      }
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
+    }
+    ir.close();
+    dir.close();
+  }
 }