Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

2018-04-11 02:54:03 -04:00 · 2018-04-11 02:54:03 -04:00 · 5b250b4a40
parent 8e77892443 0292d0f6ea
commit 5b250b4a40
9 changed files with 331 additions and 27 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -86,6 +86,8 @@ New Features

 * SOLR-12036: Factor out DefaultStreamFactory solrj class. (Christine Poerschke)

+* SOLR-12151: Add abstract MultiSolrCloudTestCase class. (Christine Poerschke)
+
 Bug Fixes
 ----------------------

@ -129,6 +131,9 @@ Bug Fixes
 (ab, Dawid Weiss, Mikhail Khludnev)

 * SOLR-12155: Exception from UnInvertedField constructor puts threads to infinite wait. (Mikhail Khludnev)
+
+* SOLR-12201: TestReplicationHandler.doTestIndexFetchOnMasterRestart(): handle unexpected replication failures
+  (Steve Rowe)
 
 Optimizations
 ----------------------
--- a/solr/core/src/test/org/apache/solr/cloud/MultiSolrCloudTestCaseTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MultiSolrCloudTestCaseTest.java
@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class MultiSolrCloudTestCaseTest extends MultiSolrCloudTestCase {
+
+  private static int numClouds;
+  private static int numCollectionsPerCloud;
+
+  private static int numShards;
+  private static int numReplicas;
+  private static int maxShardsPerNode;
+  private static int nodesPerCluster;
+
+  @BeforeClass
+  public static void setupClusters() throws Exception {
+
+    numClouds = random().nextInt(4); //  0..3
+    final String[] clusterIds = new String[numClouds];
+    for (int ii=0; ii<numClouds; ++ii) {
+      clusterIds[ii] = "cloud"+(ii+1);
+    }
+
+    numCollectionsPerCloud = random().nextInt(3); //  0..2
+    final String[] collections = new String[numCollectionsPerCloud];
+    for (int ii=0; ii<numCollectionsPerCloud; ++ii) {
+      collections[ii] = "collection"+(ii+1);
+    }
+
+    numShards = 1+random().nextInt(2);
+    numReplicas = 1+random().nextInt(2);
+    maxShardsPerNode = 1+random().nextInt(2);
+    nodesPerCluster = (numShards*numReplicas + (maxShardsPerNode-1))/maxShardsPerNode;
+
+    doSetupClusters(
+        clusterIds,
+        new DefaultClusterCreateFunction() {
+          @Override
+          protected int nodesPerCluster(String clusterId) {
+            return nodesPerCluster;
+          }
+        },
+        new DefaultClusterInitFunction(numShards, numReplicas, maxShardsPerNode) {
+          @Override
+          public void accept(String clusterId, MiniSolrCloudCluster cluster) {
+            for (final String collection : collections) {
+              if (random().nextBoolean()) {
+                doAccept(collection, cluster); // same collection name in different clouds
+              } else {
+                doAccept(collection+"_in_"+clusterId, cluster); // globally unique collection name
+              }
+            }
+          }
+        });
+  }
+
+  @Test
+  public void test() throws Exception {
+    assertEquals("numClouds", numClouds, clusterId2cluster.size());
+  }
+
+}
--- a/solr/core/src/test/org/apache/solr/core/HdfsDirectoryFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/core/HdfsDirectoryFactoryTest.java
@ -57,6 +57,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
  @BeforeClass
  public static void setupClass() throws Exception {
    dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath(), false);
+    System.setProperty("solr.hdfs.blockcache.blocksperbank", "1024");
  }
  
  @AfterClass
@ -64,6 +65,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
    HdfsTestUtil.teardownClass(dfsCluster);
    System.clearProperty("solr.hdfs.home");
    System.clearProperty(HdfsDirectoryFactory.NRTCACHINGDIRECTORY_MAXMERGESIZEMB);
+    System.clearProperty("solr.hdfs.blockcache.blocksperbank");
    dfsCluster = null;
  }

--- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java
+++ b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java
@ -626,9 +626,6 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
   * the index hasn't changed. See SOLR-9036
   */
  @Test
-  //Commented out 24-Feb 2018. JIRA marked as fixed.
-  // Still fails 26-Feb on master.
-  @BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-9036")
  public void doTestIndexFetchOnMasterRestart() throws Exception  {
    useFactory(null);
    try {
@ -663,9 +660,14 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
      String cmp = BaseDistributedSearchTestCase.compare(masterQueryResult, slaveQueryResult, 0, null);
      assertEquals(null, cmp);

-      assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated")));
+      int timesReplicated = Integer.parseInt(getSlaveDetails("timesIndexReplicated"));
      String timesFailed = getSlaveDetails("timesFailed");
-      assertEquals(0, Integer.parseInt(timesFailed != null ?  timesFailed : "0"));
+      if (null == timesFailed) {
+        timesFailed = "0";
+      }
+      int previousTimesFailed = Integer.parseInt(timesFailed);
+      // Sometimes replication will fail because master's core is still loading; make sure there was one success
+      assertEquals(1, timesReplicated - previousTimesFailed);

      masterJetty.stop();

@ -683,7 +685,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
      assertEquals(nDocs, numFound(slaveQueryRsp));

      int failed = Integer.parseInt(getSlaveDetails("timesFailed"));
-      assertTrue(failed > 0);
+      assertTrue(failed > previousTimesFailed);
      assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated")) - failed);
    } finally {
      resetFactory();
--- a/solr/solr-ref-guide/src/collections-api.adoc
+++ b/solr/solr-ref-guide/src/collections-api.adoc
@ -480,26 +480,27 @@ http://localhost:8983/solr/admin/collections?action=DELETESHARD&collection=anoth
 [[createalias]]
 == CREATEALIAS: Create or Modify an Alias for a Collection

-The `CREATEALIAS` action will create a new alias pointing to one or more collections. If an alias by the same name
-already exists, this action will replace the existing alias, effectively acting like an atomic "MOVE" command. Aliases
-come in 2 flavors: routed and non-routed.
+The `CREATEALIAS` action will create a new alias pointing to one or more collections.
+Aliases come in 2 flavors: standard and routed.

-Non-routed aliases are simpler and can serve to rename a collection or to distribute queries across several collections.
-While it is possible to send updates to an alias spanning multiple collections, non-routed alias have no logic for
-distributing documents among the referenced collections so all updates will go to the first collection in the list.
+*Standard aliases* are simple:  CREATEALIAS registers the alias name with the names of one or more collections provided
+  by the command.
+If an existing alias exists, it is replaced/updated.
+A standard alias can serve to have the appearance of renaming a collection, and can be used to atomically swap
+which backing/underlying collection is "live" for various purposes.
+When Solr searches an alias pointing to multiple collections, Solr will search all shards of all the collections as an
+  aggregated whole.
+While it is possible to send updates to an alias spanning multiple collections, standard aliases have no logic for
+  distributing documents among the referenced collections so all updates will go to the first collection in the list.

 `/admin/collections?action=CREATEALIAS&name=_name_&collections=_collectionlist_`

-Routed aliases are more complex to configure, but include logic for automatically inspecting a field on the document
-and using the value in that field to select a destination collection. Additionally, the routed alias automates the
-partitioning of data across a series of collections by creating new collections periodically. This feature allows for
-indefinite indexing of data without degradation of performance otherwise experienced due to the continuous growth of
-an index. As new data arrives, a field on the document is inspected and the document is then potentially re-routed to
-another collection. The underlying collections can can be queried independently but usually the alias will be used.
-These collections are created automatically on the fly as new data arrives based on the parameters supplied in this
-command. For very high volume use cases or for use cases requiring only a limited span of data to be retained,
-collections older than a given age can be deleted. This delete of old collections only occurs if new documents are
-sent to the alias.
+*Routed aliases* are aliases with additional capabilities to act as a kind of super-collection -- routing
+  updates to the correct collection.
+Since the only routing strategy at present is time oriented, these are also called *Time Routed Aliases* (TRAs).
+A TRA manages an alias and a time sequential series of collections that it will both create and optionally delete on-demand.
+See <<time-routed-aliases.adoc#time-routed-aliases,Time Routed Aliases>> for some important high-level information
+  before getting started.

 NOTE: Presently this is only supported for temporal fields stored as a
 <<field-types-included-with-solr.adoc#field-types-included-with-solr,DatePointField or TrieDateField>> type. Other
@ -541,15 +542,17 @@ requirements for collection naming.
 `async`::
 Request ID to track this action which will be <<Asynchronous Calls,processed asynchronously>>.

-==== Non-Routed Alias Parameters
+==== Standard Alias Parameters

 `collections`::
 A comma-separated list of collections to be aliased. The collections must already exist in the cluster.
-This parameter signals the creation of a simple (non-routed) alias. If it is present all routing parameters are
+This parameter signals the creation of a standard alias. If it is present all routing parameters are
 prohibited. If routing parameters are present this parameter is prohibited.

 ==== Routed Alias Parameters

+Most routed alias parameters become _alias properties_ that can subsequently be inspected and <<aliasprop,modified>>.
+
 `router.start`::
 The start date/time of data for this time routed alias in Solr's standard date/time format (i.e., ISO-8601 or "NOW"
 optionally with <<working-with-dates.adoc#date-math,date math>>).
--- a/solr/solr-ref-guide/src/how-solrcloud-works.adoc
+++ b/solr/solr-ref-guide/src/how-solrcloud-works.adoc
@ -1,5 +1,5 @@
 = How SolrCloud Works
-:page-children: shards-and-indexing-data-in-solrcloud, distributed-requests
+:page-children: shards-and-indexing-data-in-solrcloud, distributed-requests, time-routed-aliases
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@ -21,6 +21,7 @@ The following sections cover provide general information about how various SolrC

 * <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Shards and Indexing Data in SolrCloud>>
 * <<distributed-requests.adoc#distributed-requests,Distributed Requests>>
+* <<time-routed-aliases.adoc#time-routed-aliases,Time Routed Aliases>>

 If you are already familiar with SolrCloud concepts and basic functionality, you can skip to the section covering <<solrcloud-configuration-and-parameters.adoc#solrcloud-configuration-and-parameters,SolrCloud Configuration and Parameters>>.

--- a/solr/solr-ref-guide/src/time-routed-aliases.adoc
+++ b/solr/solr-ref-guide/src/time-routed-aliases.adoc
@ -0,0 +1,95 @@
+= Time Routed Aliases
+
+Time Routed Aliases (TRAs) is a SolrCloud feature that manages an alias and a time sequential series of collections.
+It automatically creates new collections and (optionally) deletes old ones as it routes documents to the correct
+  collection based on its timestamp.
+This approach allows for indefinite indexing of data without degradation of performance otherwise experienced due to the
+  continuous growth of a single index.
+If you need to store a lot of timestamped data in Solr, such as logs or IoT sensor data, then this feature probably
+  makes more sense than creating one sharded hash-routed collection.
+
+== How it works
+
+First you create a time routed aliases using the <<collections-api.adoc#createalias,CREATEALIAS>> command with some
+  router settings.
+Most of the settings are editable at a later time using the <<collections-api.adoc#aliasprop,ALIASPROP>> command.
+The first collection will be created automatically, along with an alias pointing to it.
+Each underlying Solr "core" in a collection that is a member of a TRA has a special core property referencing the alias.
+The name of each collection is comprised of the TRA name and the start timestamp (UTC), with trailing zeros and symbols
+  truncated.
+Ideally, as a user of this feature, you needn't concern yourself with the particulars of the collection naming pattern
+  since both queries and updates may be done via the alias.
+
+When adding data, you should usually direct documents to the alias (e.g. reference the alias name instead of any collection).
+The Solr server and CloudSolrClient will direct an update request to the first collection that an alias points to.
+The collections list for a TRA is always reverse sorted, and thus the connection path of the request will route to the
+  lead collection.  Using CloudSolrClient is preferable as it can reduce the number of underlying physical HTTP requests by one.
+If you know that a particular set of documents to be delivered is going to a particular older collection then you could
+  direct it there at the client side as an optimization but it's not necessary.  CloudSolrClient does not (yet) do this.
+
+When processing an update for a TRA, Solr initializes its
+  <<update-request-processors.adoc#update-request-processors,UpdateRequestProcessor>> chain as usual, but
+  when DistributedUpdateProcessor (DUP) initializes, it detects that the update targets a TRA and injects
+  TimeRoutedUpdateProcessor (TRUP) in front of itself.
+TRUP, in coordination with the Overseer, is the main part of a TRA, and must immediately precede DUP. It is not
+  possible to configure custom chains with other types of UpdateRequestProcessors between TRUP and DUP.
+TRUP first reads TRA configuration from the alias properties when it is initialized.  As it sees each document, it checks for
+  changes to TRA properties, updates its cached configuration if needed and then determines which collection the
+  document belongs to:
+
+* If TRUP needs to send it to a time segment represented by a collection other than the one that
+  the client chose to communicate with, then it will do so using mechanisms shared with DUP.
+  Once the document is forwarded to the correct collection (i.e. the correct TRA time segment), it skips directly to
+  DUP on the target collection and continues normally, potentially being routed again to the correct shard & replica
+  within the target collection.
+
+* If it belongs in the current collection (which is usually the case if processing events as they occur), the document
+  passes through to DUP. DUP does it's normal collection-level processing that may involve routing the document
+  to another shard & replica.
+
+* If the time stamp on the document is more recent than the most recent TRA segment, then a new collection needs to be
+  added at the front of the TRA.
+  TRUP will create this collection, add it to the alias and then forward the document to the collection it just created.
+  This can happen recursively if more than one collection needs to be created.
+  Each time a new collection is added, the oldest collections in the TRA are examined for possible deletion, if that has
+    been configured.
+  All this happens synchronously, potentially adding seconds to the update request and indexing latency.
+
+Any other type of update like a commit or delete is routed by TRUP to all collections.
+Generally speaking, this is not a performance concern. When Solr receives a delete or commit wherein nothing is deleted
+or nothing needs to be committed, then it's pretty cheap.
+
+== Improvement Possibilities
+
+This is a new feature of SolrCloud that can be expected to be improved.
+Some _potential_ areas for improvement that _are not implemented yet_ are:
+
+* Searches with time filters should only go to applicable collections.
+
+* Collections ought to be constrained by their size instead of or in addition to time.
+  Based on the underlying design, this would only apply to the lead collection.
+
+* Ways to automatically optimize (or reduce the resources of) older collections that aren't expected to receive more
+  updates, and might have less search demand.
+
+* New collections ought to be created preemptively, as an option, to avoid delaying a document that does not yet have
+  a collection to go to.
+
+* CloudSolrClient could route documents to the correct collection based on a timestamp instead always picking the
+  latest.
+
+== Limitations & Assumptions
+
+* Only *time* routed aliases are supported.  If you instead have some other sequential number, you could fake it
+  as a time (e.g. convert to a timestamp assuming some epoch and increment).
+  The smallest possible interval is one second.
+  No other routing scheme is supported, although this feature was developed with considerations that it could be
+  extended/improved to other schemes.
+
+* The underlying collections form a contiguous sequence without gaps.  This will not be suitable when there are
+  large gaps in the underlying data, as Solr will insist that there be a collection for each increment.  This
+  is due in part on Solr calculating the end time of each interval collection based on the timestamp of
+  the next collection, since it is otherwise not stored in any way.
+
+* Avoid sending updates to the oldest collection if you have also configured that old collections should be
+  automatically deleted.  It could lead to exceptions bubbling back to the indexing client.
--- a/solr/test-framework/src/java/org/apache/solr/cloud/MultiSolrCloudTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/MultiSolrCloudTestCase.java
@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import java.util.function.Function;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.junit.AfterClass;
+
+/**
+ * Base class for tests that require more than one SolrCloud
+ *
+ * Derived tests should call {@link #doSetupClusters(String[], Function, BiConsumer)} in a {@code BeforeClass}
+ * static method.  This configures and starts the {@link MiniSolrCloudCluster} instances, available
+ * via the {@code clusterId2cluster} variable.  The clusters' shutdown is handled automatically.
+ */
+public abstract class MultiSolrCloudTestCase extends SolrTestCaseJ4 {
+
+  protected static Map<String,MiniSolrCloudCluster> clusterId2cluster = new HashMap<String,MiniSolrCloudCluster>();
+
+  protected static abstract class DefaultClusterCreateFunction implements Function<String,MiniSolrCloudCluster> {
+
+    public DefaultClusterCreateFunction() {
+    }
+
+    protected abstract int nodesPerCluster(String clusterId);
+
+    @Override
+    public MiniSolrCloudCluster apply(String clusterId) {
+      try {
+        final MiniSolrCloudCluster cluster = new SolrCloudTestCase
+            .Builder(nodesPerCluster(clusterId), createTempDir())
+            .addConfig("conf", configset("cloud-dynamic"))
+            .build();
+        return cluster;
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+  }
+
+  protected static abstract class DefaultClusterInitFunction implements BiConsumer<String,MiniSolrCloudCluster> {
+
+    final private int numShards;
+    final private int numReplicas;
+    final private int maxShardsPerNode;
+
+    public DefaultClusterInitFunction(int numShards, int numReplicas, int maxShardsPerNode) {
+      this.numShards = numShards;
+      this.numReplicas = numReplicas;
+      this.maxShardsPerNode = maxShardsPerNode;
+    }
+
+    protected void doAccept(String collection, MiniSolrCloudCluster cluster) {
+      try {
+        CollectionAdminRequest
+        .createCollection(collection, "conf", numShards, numReplicas)
+        .setMaxShardsPerNode(maxShardsPerNode)
+        .processAndWait(cluster.getSolrClient(), SolrCloudTestCase.DEFAULT_TIMEOUT);
+
+        AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection, cluster.getSolrClient().getZkStateReader(), false, true, SolrCloudTestCase.DEFAULT_TIMEOUT);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+  }
+
+  protected static void doSetupClusters(final String[] clusterIds,
+      final Function<String,MiniSolrCloudCluster> createFunc,
+      final BiConsumer<String,MiniSolrCloudCluster> initFunc) throws Exception {
+
+    for (final String clusterId : clusterIds) {
+      assertFalse("duplicate clusterId "+clusterId, clusterId2cluster.containsKey(clusterId));
+      MiniSolrCloudCluster cluster = createFunc.apply(clusterId);
+      initFunc.accept(clusterId, cluster);
+      clusterId2cluster.put(clusterId, cluster);
+    }
+  }
+
+  @AfterClass
+  public static void shutdownCluster() throws Exception {
+    for (MiniSolrCloudCluster cluster : clusterId2cluster.values()) {
+      cluster.shutdown();
+    }
+  }
+
+}
--- a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java
@ -87,7 +87,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
  /**
   * Builder class for a MiniSolrCloudCluster
   */
-  protected static class Builder {
+  public static class Builder {

    private final int nodeCount;
    private final Path baseDir;
@ -187,7 +187,15 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
     * @throws Exception if an error occurs on startup
     */
    public void configure() throws Exception {
-      cluster = new MiniSolrCloudCluster(nodeCount, baseDir, solrxml, jettyConfig, null, securityJson);
+      cluster = build();
+    }
+
+    /**
+     * Configure, run and return the {@link MiniSolrCloudCluster}
+     * @throws Exception if an error occurs on startup
+     */
+    public MiniSolrCloudCluster build() throws Exception {
+      MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(nodeCount, baseDir, solrxml, jettyConfig, null, securityJson);
      CloudSolrClient client = cluster.getSolrClient();
      for (Config config : configs) {
        ((ZkClientClusterStateProvider)client.getClusterStateProvider()).uploadConfig(config.path, config.name);
@ -199,6 +207,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
          props.setClusterProperty(entry.getKey(), entry.getValue());
        }
      }
+      return cluster;
    }

  }