SOLR-4685: Fix core admin SPLIT action to be useful with non-cloud setups

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466855 13f79535-47bb-0310-9956-ffa450edef68
2013-04-11 12:49:55 +00:00 · 2013-04-11 12:49:55 +00:00 · a043c9a4d5
parent 3829e75dcc
commit a043c9a4d5
4 changed files with 94 additions and 29 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -178,6 +178,8 @@ Bug Fixes
 * SOLR-4699: The System admin handler should not assume a file system based data directory 
  location. (Mark Miller)

+* SOLR-4685: Fix core admin SPLIT action to be useful with non-cloud setups (shalin)
+
 Optimizations
 ----------------------

--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
@ -231,18 +231,17 @@ public class CoreAdminHandler extends RequestHandlerBase {
   */
  protected boolean handleSplitAction(SolrQueryRequest adminReq, SolrQueryResponse rsp) throws IOException {
    SolrParams params = adminReq.getParams();
-     // partitions=N    (split into N partitions, leaving it up to solr what the ranges are and where to put them)
-    // path - multiValued param, or comma separated param?  Only creates indexes, not cores
-
    List<DocRouter.Range> ranges = null;
-    // boolean closeDirectories = true;
-    // DirectoryFactory dirFactory = null;

    String[] pathsArr = params.getParams("path");
    String rangesStr = params.get("ranges");    // ranges=a-b,c-d,e-f
    String[] newCoreNames = params.getParams("targetCore");
-
    String cname = params.get(CoreAdminParams.CORE, "");
+
+    if ((pathsArr == null || pathsArr.length == 0) && (newCoreNames == null || newCoreNames.length == 0)) {
+      throw new SolrException(ErrorCode.BAD_REQUEST, "Either path or targetCore param must be specified");
+    }
+
    log.info("Invoked split action for core: " + cname);
    SolrCore core = coreContainer.getCore(cname);
    SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
@ -251,20 +250,18 @@ public class CoreAdminHandler extends RequestHandlerBase {
    try {
      // TODO: allow use of rangesStr in the future
      List<String> paths = null;
-      int partitions = pathsArr != null ? pathsArr.length : params.getInt("partitions", 2);
+      int partitions = pathsArr != null ? pathsArr.length : newCoreNames.length;

-      // TODO: if we don't know the real range of the current core, we should just
-      //  split on every other doc rather than hash.
-      ClusterState clusterState = coreContainer.getZkController().getClusterState();
-      String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
-      DocCollection collection = clusterState.getCollection(collectionName);
-      String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
-      Slice slice = clusterState.getSlice(collectionName, sliceName);
-      DocRouter.Range currentRange = slice.getRange() == null ?
-          new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE) : slice.getRange();
-
-      DocRouter hp = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
-      ranges = hp.partitionRange(partitions, currentRange);
+      if (coreContainer.isZooKeeperAware()) {
+        ClusterState clusterState = coreContainer.getZkController().getClusterState();
+        String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
+        DocCollection collection = clusterState.getCollection(collectionName);
+        String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
+        Slice slice = clusterState.getSlice(collectionName, sliceName);
+        DocRouter.Range currentRange = slice.getRange();
+        DocRouter hp = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
+        ranges = currentRange != null ? hp.partitionRange(partitions, currentRange) : null;
+      }

      if (pathsArr == null) {
        newCores = new ArrayList<SolrCore>(partitions);
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
@ -53,14 +53,21 @@ public class SolrIndexSplitter {
  DocRouter.Range[] rangesArr; // same as ranges list, but an array for extra speed in inner loops
  List<String> paths;
  List<SolrCore> cores;
+  int numPieces;
+  int currPartition = 0;

  public SolrIndexSplitter(SplitIndexCommand cmd) {
    field = cmd.getReq().getSchema().getUniqueKeyField();
    searcher = cmd.getReq().getSearcher();
    ranges = cmd.ranges;
-    rangesArr = ranges.toArray(new DocRouter.Range[ranges.size()]);
    paths = cmd.paths;
    cores = cmd.cores;
+    if (ranges == null) {
+      numPieces =  paths != null ? paths.size() : cores.size();
+    } else  {
+      numPieces = ranges.size();
+      rangesArr = ranges.toArray(new DocRouter.Range[ranges.size()]);
+    }
  }

  public void split() throws IOException {
@ -68,7 +75,7 @@ public class SolrIndexSplitter {
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
    List<OpenBitSet[]> segmentDocSets = new ArrayList<OpenBitSet[]>(leaves.size());

-    log.info("SolrIndexSplitter: partitions=" + ranges.size() + " segments="+leaves.size());
+    log.info("SolrIndexSplitter: partitions=" + numPieces + " segments="+leaves.size());

    for (AtomicReaderContext readerContext : leaves) {
      assert readerContext.ordInParent == segmentDocSets.size();  // make sure we're going in order
@ -83,8 +90,8 @@ public class SolrIndexSplitter {
    // - would be more efficient on the read side, but prob less efficient merging

    IndexReader[] subReaders = new IndexReader[leaves.size()];
-    for (int partitionNumber=0; partitionNumber<ranges.size(); partitionNumber++) {
-      log.info("SolrIndexSplitter: partition #" + partitionNumber + " range=" + ranges.get(partitionNumber));
+    for (int partitionNumber=0; partitionNumber<numPieces; partitionNumber++) {
+      log.info("SolrIndexSplitter: partition #" + partitionNumber + (ranges != null ? " range=" + ranges.get(partitionNumber) : ""));

      for (int segmentNumber = 0; segmentNumber<subReaders.length; segmentNumber++) {
        subReaders[segmentNumber] = new LiveDocsReader( leaves.get(segmentNumber), segmentDocSets.get(segmentNumber)[partitionNumber] );
@ -101,7 +108,7 @@ public class SolrIndexSplitter {
      } else {
        SolrCore core = searcher.getCore();
        String path = paths.get(partitionNumber);
-        iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + " " + ranges.get(partitionNumber), path,
+        iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
                                    core.getDirectoryFactory(), true, core.getSchema(),
                                    core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
      }
@ -130,7 +137,7 @@ public class SolrIndexSplitter {

  OpenBitSet[] split(AtomicReaderContext readerContext) throws IOException {
    AtomicReader reader = readerContext.reader();
-    OpenBitSet[] docSets = new OpenBitSet[ranges.size()];
+    OpenBitSet[] docSets = new OpenBitSet[numPieces];
    for (int i=0; i<docSets.length; i++) {
      docSets[i] = new OpenBitSet(reader.maxDoc());
    }
@ -158,9 +165,14 @@ public class SolrIndexSplitter {
      for (;;) {
        int doc = docsEnum.nextDoc();
        if (doc == DocsEnum.NO_MORE_DOCS) break;
-        for (int i=0; i<rangesArr.length; i++) {      // inner-loop: use array here for extra speed.
-          if (rangesArr[i].includes(hash)) {
-            docSets[i].fastSet(doc);
+        if (ranges == null) {
+          docSets[currPartition].fastSet(doc);
+          currPartition = (currPartition + 1) % numPieces;
+        } else  {
+          for (int i=0; i<rangesArr.length; i++) {      // inner-loop: use array here for extra speed.
+            if (rangesArr[i].includes(hash)) {
+              docSets[i].fastSet(doc);
+            }
          }
        }
      }
--- a/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
+++ b/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
@ -41,7 +41,7 @@ import java.io.UnsupportedEncodingException;
 import java.util.List;

 public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
-  File indexDir1 = null, indexDir2 = null;
+  File indexDir1 = null, indexDir2 = null, indexDir3 = null;

  @BeforeClass
  public static void beforeClass() throws Exception {
@ -58,6 +58,8 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
        + "_testSplit1");
    indexDir2 = new File(TEMP_DIR, this.getClass().getName()
        + "_testSplit2");
+    indexDir3 = new File(TEMP_DIR, this.getClass().getName()
+        + "_testSplit3");

    if (indexDir1.exists()) {
      FileUtils.deleteDirectory(indexDir1);
@ -68,6 +70,11 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
      FileUtils.deleteDirectory(indexDir2);
    }
    assertTrue("Failed to mkdirs indexDir2 for split index", indexDir2.mkdirs());
+
+    if (indexDir3.exists()) {
+      FileUtils.deleteDirectory(indexDir3);
+    }
+    assertTrue("Failed to mkdirs indexDir3 for split index", indexDir3.mkdirs());
  }

  @Test
@ -162,6 +169,53 @@ public class SolrIndexSplitterTest extends SolrTestCaseJ4 {
    }
  }

+  @Test
+  public void testSplitAlternately() throws Exception {
+    LocalSolrQueryRequest request = null;
+    Directory directory = null;
+    try {
+      // add an even number of docs
+      int max = (1 + random().nextInt(10)) * 3;
+      log.info("Adding {} number of documents", max);
+      for (int i = 0; i < max; i++) {
+        assertU(adoc("id", String.valueOf(i)));
+      }
+      assertU(commit());
+
+      request = lrf.makeRequest("q", "dummy");
+
+      SplitIndexCommand command = new SplitIndexCommand(request,
+          Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null);
+      new SolrIndexSplitter(command).split();
+
+      directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
+          DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
+      DirectoryReader reader = DirectoryReader.open(directory);
+      assertEquals("split index1 has wrong number of documents", max / 3, reader.numDocs());
+      reader.close();
+      h.getCore().getDirectoryFactory().release(directory);
+      directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(),
+          DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
+      reader = DirectoryReader.open(directory);
+      assertEquals("split index2 has wrong number of documents", max / 3, reader.numDocs());
+      reader.close();
+      h.getCore().getDirectoryFactory().release(directory);
+      directory = h.getCore().getDirectoryFactory().get(indexDir3.getAbsolutePath(),
+          DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
+      reader = DirectoryReader.open(directory);
+      assertEquals("split index3 has wrong number of documents", max / 3, reader.numDocs());
+      reader.close();
+      h.getCore().getDirectoryFactory().release(directory);
+      directory = null;
+    } finally {
+      if (request != null) request.close(); // decrefs the searcher
+      if (directory != null)  {
+        // perhaps an assert failed, release the directory
+        h.getCore().getDirectoryFactory().release(directory);
+      }
+    }
+  }
+
  private List<DocRouter.Range> getRanges(String id1, String id2) throws UnsupportedEncodingException {
    // find minHash/maxHash hash ranges
    byte[] bytes = id1.getBytes("UTF-8");