This commit is contained in:
Karl Wright 2018-04-11 02:54:03 -04:00
commit 5b250b4a40
9 changed files with 331 additions and 27 deletions

View File

@ -86,6 +86,8 @@ New Features
* SOLR-12036: Factor out DefaultStreamFactory solrj class. (Christine Poerschke) * SOLR-12036: Factor out DefaultStreamFactory solrj class. (Christine Poerschke)
* SOLR-12151: Add abstract MultiSolrCloudTestCase class. (Christine Poerschke)
Bug Fixes Bug Fixes
---------------------- ----------------------
@ -129,6 +131,9 @@ Bug Fixes
(ab, Dawid Weiss, Mikhail Khludnev) (ab, Dawid Weiss, Mikhail Khludnev)
* SOLR-12155: Exception from UnInvertedField constructor puts threads to infinite wait. (Mikhail Khludnev) * SOLR-12155: Exception from UnInvertedField constructor puts threads to infinite wait. (Mikhail Khludnev)
* SOLR-12201: TestReplicationHandler.doTestIndexFetchOnMasterRestart(): handle unexpected replication failures
(Steve Rowe)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.junit.BeforeClass;
import org.junit.Test;
public class MultiSolrCloudTestCaseTest extends MultiSolrCloudTestCase {
private static int numClouds;
private static int numCollectionsPerCloud;
private static int numShards;
private static int numReplicas;
private static int maxShardsPerNode;
private static int nodesPerCluster;
@BeforeClass
public static void setupClusters() throws Exception {
numClouds = random().nextInt(4); // 0..3
final String[] clusterIds = new String[numClouds];
for (int ii=0; ii<numClouds; ++ii) {
clusterIds[ii] = "cloud"+(ii+1);
}
numCollectionsPerCloud = random().nextInt(3); // 0..2
final String[] collections = new String[numCollectionsPerCloud];
for (int ii=0; ii<numCollectionsPerCloud; ++ii) {
collections[ii] = "collection"+(ii+1);
}
numShards = 1+random().nextInt(2);
numReplicas = 1+random().nextInt(2);
maxShardsPerNode = 1+random().nextInt(2);
nodesPerCluster = (numShards*numReplicas + (maxShardsPerNode-1))/maxShardsPerNode;
doSetupClusters(
clusterIds,
new DefaultClusterCreateFunction() {
@Override
protected int nodesPerCluster(String clusterId) {
return nodesPerCluster;
}
},
new DefaultClusterInitFunction(numShards, numReplicas, maxShardsPerNode) {
@Override
public void accept(String clusterId, MiniSolrCloudCluster cluster) {
for (final String collection : collections) {
if (random().nextBoolean()) {
doAccept(collection, cluster); // same collection name in different clouds
} else {
doAccept(collection+"_in_"+clusterId, cluster); // globally unique collection name
}
}
}
});
}
@Test
public void test() throws Exception {
assertEquals("numClouds", numClouds, clusterId2cluster.size());
}
}

View File

@ -57,6 +57,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
@BeforeClass @BeforeClass
public static void setupClass() throws Exception { public static void setupClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath(), false); dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath(), false);
System.setProperty("solr.hdfs.blockcache.blocksperbank", "1024");
} }
@AfterClass @AfterClass
@ -64,6 +65,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
HdfsTestUtil.teardownClass(dfsCluster); HdfsTestUtil.teardownClass(dfsCluster);
System.clearProperty("solr.hdfs.home"); System.clearProperty("solr.hdfs.home");
System.clearProperty(HdfsDirectoryFactory.NRTCACHINGDIRECTORY_MAXMERGESIZEMB); System.clearProperty(HdfsDirectoryFactory.NRTCACHINGDIRECTORY_MAXMERGESIZEMB);
System.clearProperty("solr.hdfs.blockcache.blocksperbank");
dfsCluster = null; dfsCluster = null;
} }

View File

@ -626,9 +626,6 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
* the index hasn't changed. See SOLR-9036 * the index hasn't changed. See SOLR-9036
*/ */
@Test @Test
//Commented out 24-Feb 2018. JIRA marked as fixed.
// Still fails 26-Feb on master.
@BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-9036")
public void doTestIndexFetchOnMasterRestart() throws Exception { public void doTestIndexFetchOnMasterRestart() throws Exception {
useFactory(null); useFactory(null);
try { try {
@ -663,9 +660,14 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
String cmp = BaseDistributedSearchTestCase.compare(masterQueryResult, slaveQueryResult, 0, null); String cmp = BaseDistributedSearchTestCase.compare(masterQueryResult, slaveQueryResult, 0, null);
assertEquals(null, cmp); assertEquals(null, cmp);
assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated"))); int timesReplicated = Integer.parseInt(getSlaveDetails("timesIndexReplicated"));
String timesFailed = getSlaveDetails("timesFailed"); String timesFailed = getSlaveDetails("timesFailed");
assertEquals(0, Integer.parseInt(timesFailed != null ? timesFailed : "0")); if (null == timesFailed) {
timesFailed = "0";
}
int previousTimesFailed = Integer.parseInt(timesFailed);
// Sometimes replication will fail because master's core is still loading; make sure there was one success
assertEquals(1, timesReplicated - previousTimesFailed);
masterJetty.stop(); masterJetty.stop();
@ -683,7 +685,7 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
assertEquals(nDocs, numFound(slaveQueryRsp)); assertEquals(nDocs, numFound(slaveQueryRsp));
int failed = Integer.parseInt(getSlaveDetails("timesFailed")); int failed = Integer.parseInt(getSlaveDetails("timesFailed"));
assertTrue(failed > 0); assertTrue(failed > previousTimesFailed);
assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated")) - failed); assertEquals(1, Integer.parseInt(getSlaveDetails("timesIndexReplicated")) - failed);
} finally { } finally {
resetFactory(); resetFactory();

View File

@ -480,26 +480,27 @@ http://localhost:8983/solr/admin/collections?action=DELETESHARD&collection=anoth
[[createalias]] [[createalias]]
== CREATEALIAS: Create or Modify an Alias for a Collection == CREATEALIAS: Create or Modify an Alias for a Collection
The `CREATEALIAS` action will create a new alias pointing to one or more collections. If an alias by the same name The `CREATEALIAS` action will create a new alias pointing to one or more collections.
already exists, this action will replace the existing alias, effectively acting like an atomic "MOVE" command. Aliases Aliases come in 2 flavors: standard and routed.
come in 2 flavors: routed and non-routed.
Non-routed aliases are simpler and can serve to rename a collection or to distribute queries across several collections. *Standard aliases* are simple: CREATEALIAS registers the alias name with the names of one or more collections provided
While it is possible to send updates to an alias spanning multiple collections, non-routed alias have no logic for by the command.
distributing documents among the referenced collections so all updates will go to the first collection in the list. If an existing alias exists, it is replaced/updated.
A standard alias can serve to have the appearance of renaming a collection, and can be used to atomically swap
which backing/underlying collection is "live" for various purposes.
When Solr searches an alias pointing to multiple collections, Solr will search all shards of all the collections as an
aggregated whole.
While it is possible to send updates to an alias spanning multiple collections, standard aliases have no logic for
distributing documents among the referenced collections so all updates will go to the first collection in the list.
`/admin/collections?action=CREATEALIAS&name=_name_&collections=_collectionlist_` `/admin/collections?action=CREATEALIAS&name=_name_&collections=_collectionlist_`
Routed aliases are more complex to configure, but include logic for automatically inspecting a field on the document *Routed aliases* are aliases with additional capabilities to act as a kind of super-collection -- routing
and using the value in that field to select a destination collection. Additionally, the routed alias automates the updates to the correct collection.
partitioning of data across a series of collections by creating new collections periodically. This feature allows for Since the only routing strategy at present is time oriented, these are also called *Time Routed Aliases* (TRAs).
indefinite indexing of data without degradation of performance otherwise experienced due to the continuous growth of A TRA manages an alias and a time sequential series of collections that it will both create and optionally delete on-demand.
an index. As new data arrives, a field on the document is inspected and the document is then potentially re-routed to See <<time-routed-aliases.adoc#time-routed-aliases,Time Routed Aliases>> for some important high-level information
another collection. The underlying collections can can be queried independently but usually the alias will be used. before getting started.
These collections are created automatically on the fly as new data arrives based on the parameters supplied in this
command. For very high volume use cases or for use cases requiring only a limited span of data to be retained,
collections older than a given age can be deleted. This delete of old collections only occurs if new documents are
sent to the alias.
NOTE: Presently this is only supported for temporal fields stored as a NOTE: Presently this is only supported for temporal fields stored as a
<<field-types-included-with-solr.adoc#field-types-included-with-solr,DatePointField or TrieDateField>> type. Other <<field-types-included-with-solr.adoc#field-types-included-with-solr,DatePointField or TrieDateField>> type. Other
@ -541,15 +542,17 @@ requirements for collection naming.
`async`:: `async`::
Request ID to track this action which will be <<Asynchronous Calls,processed asynchronously>>. Request ID to track this action which will be <<Asynchronous Calls,processed asynchronously>>.
==== Non-Routed Alias Parameters ==== Standard Alias Parameters
`collections`:: `collections`::
A comma-separated list of collections to be aliased. The collections must already exist in the cluster. A comma-separated list of collections to be aliased. The collections must already exist in the cluster.
This parameter signals the creation of a simple (non-routed) alias. If it is present all routing parameters are This parameter signals the creation of a standard alias. If it is present all routing parameters are
prohibited. If routing parameters are present this parameter is prohibited. prohibited. If routing parameters are present this parameter is prohibited.
==== Routed Alias Parameters ==== Routed Alias Parameters
Most routed alias parameters become _alias properties_ that can subsequently be inspected and <<aliasprop,modified>>.
`router.start`:: `router.start`::
The start date/time of data for this time routed alias in Solr's standard date/time format (i.e., ISO-8601 or "NOW" The start date/time of data for this time routed alias in Solr's standard date/time format (i.e., ISO-8601 or "NOW"
optionally with <<working-with-dates.adoc#date-math,date math>>). optionally with <<working-with-dates.adoc#date-math,date math>>).

View File

@ -1,5 +1,5 @@
= How SolrCloud Works = How SolrCloud Works
:page-children: shards-and-indexing-data-in-solrcloud, distributed-requests :page-children: shards-and-indexing-data-in-solrcloud, distributed-requests, time-routed-aliases
// Licensed to the Apache Software Foundation (ASF) under one // Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file // or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information // distributed with this work for additional information
@ -21,6 +21,7 @@ The following sections cover provide general information about how various SolrC
* <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Shards and Indexing Data in SolrCloud>> * <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Shards and Indexing Data in SolrCloud>>
* <<distributed-requests.adoc#distributed-requests,Distributed Requests>> * <<distributed-requests.adoc#distributed-requests,Distributed Requests>>
* <<time-routed-aliases.adoc#time-routed-aliases,Time Routed Aliases>>
If you are already familiar with SolrCloud concepts and basic functionality, you can skip to the section covering <<solrcloud-configuration-and-parameters.adoc#solrcloud-configuration-and-parameters,SolrCloud Configuration and Parameters>>. If you are already familiar with SolrCloud concepts and basic functionality, you can skip to the section covering <<solrcloud-configuration-and-parameters.adoc#solrcloud-configuration-and-parameters,SolrCloud Configuration and Parameters>>.

View File

@ -0,0 +1,95 @@
= Time Routed Aliases
Time Routed Aliases (TRAs) is a SolrCloud feature that manages an alias and a time sequential series of collections.
It automatically creates new collections and (optionally) deletes old ones as it routes documents to the correct
collection based on its timestamp.
This approach allows for indefinite indexing of data without degradation of performance otherwise experienced due to the
continuous growth of a single index.
If you need to store a lot of timestamped data in Solr, such as logs or IoT sensor data, then this feature probably
makes more sense than creating one sharded hash-routed collection.
== How it works
First you create a time routed aliases using the <<collections-api.adoc#createalias,CREATEALIAS>> command with some
router settings.
Most of the settings are editable at a later time using the <<collections-api.adoc#aliasprop,ALIASPROP>> command.
The first collection will be created automatically, along with an alias pointing to it.
Each underlying Solr "core" in a collection that is a member of a TRA has a special core property referencing the alias.
The name of each collection is comprised of the TRA name and the start timestamp (UTC), with trailing zeros and symbols
truncated.
Ideally, as a user of this feature, you needn't concern yourself with the particulars of the collection naming pattern
since both queries and updates may be done via the alias.
When adding data, you should usually direct documents to the alias (e.g. reference the alias name instead of any collection).
The Solr server and CloudSolrClient will direct an update request to the first collection that an alias points to.
The collections list for a TRA is always reverse sorted, and thus the connection path of the request will route to the
lead collection. Using CloudSolrClient is preferable as it can reduce the number of underlying physical HTTP requests by one.
If you know that a particular set of documents to be delivered is going to a particular older collection then you could
direct it there at the client side as an optimization but it's not necessary. CloudSolrClient does not (yet) do this.
When processing an update for a TRA, Solr initializes its
<<update-request-processors.adoc#update-request-processors,UpdateRequestProcessor>> chain as usual, but
when DistributedUpdateProcessor (DUP) initializes, it detects that the update targets a TRA and injects
TimeRoutedUpdateProcessor (TRUP) in front of itself.
TRUP, in coordination with the Overseer, is the main part of a TRA, and must immediately precede DUP. It is not
possible to configure custom chains with other types of UpdateRequestProcessors between TRUP and DUP.
TRUP first reads TRA configuration from the alias properties when it is initialized. As it sees each document, it checks for
changes to TRA properties, updates its cached configuration if needed and then determines which collection the
document belongs to:
* If TRUP needs to send it to a time segment represented by a collection other than the one that
the client chose to communicate with, then it will do so using mechanisms shared with DUP.
Once the document is forwarded to the correct collection (i.e. the correct TRA time segment), it skips directly to
DUP on the target collection and continues normally, potentially being routed again to the correct shard & replica
within the target collection.
* If it belongs in the current collection (which is usually the case if processing events as they occur), the document
passes through to DUP. DUP does it's normal collection-level processing that may involve routing the document
to another shard & replica.
* If the time stamp on the document is more recent than the most recent TRA segment, then a new collection needs to be
added at the front of the TRA.
TRUP will create this collection, add it to the alias and then forward the document to the collection it just created.
This can happen recursively if more than one collection needs to be created.
Each time a new collection is added, the oldest collections in the TRA are examined for possible deletion, if that has
been configured.
All this happens synchronously, potentially adding seconds to the update request and indexing latency.
Any other type of update like a commit or delete is routed by TRUP to all collections.
Generally speaking, this is not a performance concern. When Solr receives a delete or commit wherein nothing is deleted
or nothing needs to be committed, then it's pretty cheap.
== Improvement Possibilities
This is a new feature of SolrCloud that can be expected to be improved.
Some _potential_ areas for improvement that _are not implemented yet_ are:
* Searches with time filters should only go to applicable collections.
* Collections ought to be constrained by their size instead of or in addition to time.
Based on the underlying design, this would only apply to the lead collection.
* Ways to automatically optimize (or reduce the resources of) older collections that aren't expected to receive more
updates, and might have less search demand.
* New collections ought to be created preemptively, as an option, to avoid delaying a document that does not yet have
a collection to go to.
* CloudSolrClient could route documents to the correct collection based on a timestamp instead always picking the
latest.
== Limitations & Assumptions
* Only *time* routed aliases are supported. If you instead have some other sequential number, you could fake it
as a time (e.g. convert to a timestamp assuming some epoch and increment).
The smallest possible interval is one second.
No other routing scheme is supported, although this feature was developed with considerations that it could be
extended/improved to other schemes.
* The underlying collections form a contiguous sequence without gaps. This will not be suitable when there are
large gaps in the underlying data, as Solr will insist that there be a collection for each increment. This
is due in part on Solr calculating the end time of each interval collection based on the timestamp of
the next collection, since it is otherwise not stored in any way.
* Avoid sending updates to the oldest collection if you have also configured that old collections should be
automatically deleted. It could lead to exceptions bubbling back to the indexing client.

View File

@ -0,0 +1,107 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.util.HashMap;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.junit.AfterClass;
/**
* Base class for tests that require more than one SolrCloud
*
* Derived tests should call {@link #doSetupClusters(String[], Function, BiConsumer)} in a {@code BeforeClass}
* static method. This configures and starts the {@link MiniSolrCloudCluster} instances, available
* via the {@code clusterId2cluster} variable. The clusters' shutdown is handled automatically.
*/
public abstract class MultiSolrCloudTestCase extends SolrTestCaseJ4 {
protected static Map<String,MiniSolrCloudCluster> clusterId2cluster = new HashMap<String,MiniSolrCloudCluster>();
protected static abstract class DefaultClusterCreateFunction implements Function<String,MiniSolrCloudCluster> {
public DefaultClusterCreateFunction() {
}
protected abstract int nodesPerCluster(String clusterId);
@Override
public MiniSolrCloudCluster apply(String clusterId) {
try {
final MiniSolrCloudCluster cluster = new SolrCloudTestCase
.Builder(nodesPerCluster(clusterId), createTempDir())
.addConfig("conf", configset("cloud-dynamic"))
.build();
return cluster;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
protected static abstract class DefaultClusterInitFunction implements BiConsumer<String,MiniSolrCloudCluster> {
final private int numShards;
final private int numReplicas;
final private int maxShardsPerNode;
public DefaultClusterInitFunction(int numShards, int numReplicas, int maxShardsPerNode) {
this.numShards = numShards;
this.numReplicas = numReplicas;
this.maxShardsPerNode = maxShardsPerNode;
}
protected void doAccept(String collection, MiniSolrCloudCluster cluster) {
try {
CollectionAdminRequest
.createCollection(collection, "conf", numShards, numReplicas)
.setMaxShardsPerNode(maxShardsPerNode)
.processAndWait(cluster.getSolrClient(), SolrCloudTestCase.DEFAULT_TIMEOUT);
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collection, cluster.getSolrClient().getZkStateReader(), false, true, SolrCloudTestCase.DEFAULT_TIMEOUT);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
protected static void doSetupClusters(final String[] clusterIds,
final Function<String,MiniSolrCloudCluster> createFunc,
final BiConsumer<String,MiniSolrCloudCluster> initFunc) throws Exception {
for (final String clusterId : clusterIds) {
assertFalse("duplicate clusterId "+clusterId, clusterId2cluster.containsKey(clusterId));
MiniSolrCloudCluster cluster = createFunc.apply(clusterId);
initFunc.accept(clusterId, cluster);
clusterId2cluster.put(clusterId, cluster);
}
}
@AfterClass
public static void shutdownCluster() throws Exception {
for (MiniSolrCloudCluster cluster : clusterId2cluster.values()) {
cluster.shutdown();
}
}
}

View File

@ -87,7 +87,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
/** /**
* Builder class for a MiniSolrCloudCluster * Builder class for a MiniSolrCloudCluster
*/ */
protected static class Builder { public static class Builder {
private final int nodeCount; private final int nodeCount;
private final Path baseDir; private final Path baseDir;
@ -187,7 +187,15 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
* @throws Exception if an error occurs on startup * @throws Exception if an error occurs on startup
*/ */
public void configure() throws Exception { public void configure() throws Exception {
cluster = new MiniSolrCloudCluster(nodeCount, baseDir, solrxml, jettyConfig, null, securityJson); cluster = build();
}
/**
* Configure, run and return the {@link MiniSolrCloudCluster}
* @throws Exception if an error occurs on startup
*/
public MiniSolrCloudCluster build() throws Exception {
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(nodeCount, baseDir, solrxml, jettyConfig, null, securityJson);
CloudSolrClient client = cluster.getSolrClient(); CloudSolrClient client = cluster.getSolrClient();
for (Config config : configs) { for (Config config : configs) {
((ZkClientClusterStateProvider)client.getClusterStateProvider()).uploadConfig(config.path, config.name); ((ZkClientClusterStateProvider)client.getClusterStateProvider()).uploadConfig(config.path, config.name);
@ -199,6 +207,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
props.setClusterProperty(entry.getKey(), entry.getValue()); props.setClusterProperty(entry.getKey(), entry.getValue());
} }
} }
return cluster;
} }
} }