From b5ed6350a0ea444553242ef2b141161c0fc3830b Mon Sep 17 00:00:00 2001
From: Andrzej Bialecki <ab@apache.org>
Date: Thu, 2 Aug 2018 21:10:01 +0200
Subject: [PATCH] SOLR-12509: Fix a bug when using round-robin doc assignment.

---
 .../apache/solr/update/SolrIndexSplitter.java | 27 +++++++++++--------
 .../solr/update/SolrIndexSplitterTest.java    | 10 ++++---
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
index 75234fa77d9..334a29df3ec 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
@@ -26,6 +26,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.FilterCodecReader;
@@ -212,11 +213,14 @@ public class SolrIndexSplitter {
     log.info("SolrIndexSplitter: partitions=" + numPieces + " segments=" + leaves.size());
     RTimerTree t;
 
+    // this tracks round-robin assignment of docs to partitions
+    AtomicInteger currentPartition = new AtomicInteger();
+
     if (splitMethod != SplitMethod.LINK) {
       t = timings.sub("findDocSetsPerLeaf");
       for (LeafReaderContext readerContext : leaves) {
         assert readerContext.ordInParent == segmentDocSets.size();  // make sure we're going in order
-        FixedBitSet[] docSets = split(readerContext, numPieces, field, rangesArr, splitKey, hashRouter, false);
+        FixedBitSet[] docSets = split(readerContext, numPieces, field, rangesArr, splitKey, hashRouter, currentPartition, false);
         segmentDocSets.add(docSets);
       }
       t.stop();
@@ -295,7 +299,7 @@ public class SolrIndexSplitter {
           t.resume();
           // apply deletions specific to this partition. As a side-effect on the first call this also populates
           // a cache of docsets to delete per leaf reader per partition, which is reused for subsequent partitions.
-          iw.deleteDocuments(new SplittingQuery(partitionNumber, field, rangesArr, hashRouter, splitKey, docsToDeleteCache));
+          iw.deleteDocuments(new SplittingQuery(partitionNumber, field, rangesArr, hashRouter, splitKey, docsToDeleteCache, currentPartition));
           t.pause();
         } else {
           // This removes deletions but optimize might still be needed because sub-shards will have the same number of segments as the parent shard.
@@ -433,15 +437,17 @@ public class SolrIndexSplitter {
     private final HashBasedRouter hashRouter;
     private final String splitKey;
     private final Map<IndexReader.CacheKey, FixedBitSet[]> docsToDelete;
+    private final AtomicInteger currentPartition;
 
     SplittingQuery(int partition, SchemaField field, DocRouter.Range[] rangesArr, HashBasedRouter hashRouter, String splitKey,
-                   Map<IndexReader.CacheKey, FixedBitSet[]> docsToDelete) {
+                   Map<IndexReader.CacheKey, FixedBitSet[]> docsToDelete, AtomicInteger currentPartition) {
       this.partition = partition;
       this.field = field;
       this.rangesArr = rangesArr;
       this.hashRouter = hashRouter;
       this.splitKey = splitKey;
       this.docsToDelete = docsToDelete;
+      this.currentPartition = currentPartition;
     }
 
     @Override
@@ -493,7 +499,7 @@ public class SolrIndexSplitter {
           return perPartition[partition];
         }
 
-        perPartition = split(readerContext, numPieces, field, rangesArr, splitKey, hashRouter, true);
+        perPartition = split(readerContext, numPieces, field, rangesArr, splitKey, hashRouter, currentPartition, true);
         docsToDelete.put(readerContext.reader().getCoreCacheHelper().getKey(), perPartition);
         return perPartition[partition];
       }
@@ -526,7 +532,7 @@ public class SolrIndexSplitter {
   }
 
   static FixedBitSet[] split(LeafReaderContext readerContext, int numPieces, SchemaField field, DocRouter.Range[] rangesArr,
-                             String splitKey, HashBasedRouter hashRouter, boolean delete) throws IOException {
+                             String splitKey, HashBasedRouter hashRouter, AtomicInteger currentPartition, boolean delete) throws IOException {
     LeafReader reader = readerContext.reader();
     FixedBitSet[] docSets = new FixedBitSet[numPieces];
     for (int i=0; i<docSets.length; i++) {
@@ -556,7 +562,6 @@ public class SolrIndexSplitter {
       docsMatchingRanges = new int[rangesArr.length+1];
     }
 
-    int partition = 0;
     CharsRefBuilder idRef = new CharsRefBuilder();
     for (;;) {
       term = termsEnum.next();
@@ -580,7 +585,7 @@ public class SolrIndexSplitter {
       }
 
       int hash = 0;
-      if (hashRouter != null) {
+      if (hashRouter != null && rangesArr != null) {
         hash = hashRouter.sliceHash(idString, null, null, null);
       }
 
@@ -591,14 +596,14 @@ public class SolrIndexSplitter {
         if (doc == DocIdSetIterator.NO_MORE_DOCS) break;
         if (rangesArr == null) {
           if (delete) {
-            docSets[partition].clear(doc);
+            docSets[currentPartition.get()].clear(doc);
           } else {
-            docSets[partition].set(doc);
+            docSets[currentPartition.get()].set(doc);
           }
-          partition = (partition + 1) % numPieces;
+          currentPartition.set((currentPartition.get() + 1) % numPieces);
         } else  {
           int matchingRangesCount = 0;
-          for (int i=0; i<rangesArr.length; i++) {      // inner-loop: use array here for extra speed.
+          for (int i=0; i < rangesArr.length; i++) {      // inner-loop: use array here for extra speed.
             if (rangesArr[i].includes(hash)) {
               if (delete) {
                 docSets[i].clear(doc);
diff --git a/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java b/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
index 79eccd9784c..0bcc851b616 100644
--- a/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
+++ b/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
@@ -269,22 +269,26 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
       directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
           DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
       DirectoryReader reader = DirectoryReader.open(directory);
-      assertEquals("split index1 has wrong number of documents", max / 3, reader.numDocs());
+      int numDocs1 = reader.numDocs();
       reader.close();
       h.getCore().getDirectoryFactory().release(directory);
       directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(),
           DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
       reader = DirectoryReader.open(directory);
-      assertEquals("split index2 has wrong number of documents", max / 3, reader.numDocs());
+      int numDocs2 = reader.numDocs();
       reader.close();
       h.getCore().getDirectoryFactory().release(directory);
       directory = h.getCore().getDirectoryFactory().get(indexDir3.getAbsolutePath(),
           DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
       reader = DirectoryReader.open(directory);
-      assertEquals("split index3 has wrong number of documents", max / 3, reader.numDocs());
+      int numDocs3 = reader.numDocs();
       reader.close();
       h.getCore().getDirectoryFactory().release(directory);
       directory = null;
+      assertEquals("split indexes lost some documents!", max, numDocs1 + numDocs2 + numDocs3);
+      assertEquals("split index1 has wrong number of documents", max / 3, numDocs1);
+      assertEquals("split index2 has wrong number of documents", max / 3, numDocs2);
+      assertEquals("split index3 has wrong number of documents", max / 3, numDocs3);
     } finally {
       if (request != null) request.close(); // decrefs the searcher
       if (directory != null)  {