mirror of https://github.com/apache/lucene.git
SOLR-5209: Unloading or deleting the last replica of a shard now no longer cascades to remove the shard from the clusterstate.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1724098 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
439c29ae28
commit
c5883d54b9
|
@ -210,6 +210,9 @@ Other Changes
|
||||||
|
|
||||||
* SOLR-8443: Change /stream handler http param from "stream" to "expr" (Joel Bernstein, Dennis Gove)
|
* SOLR-8443: Change /stream handler http param from "stream" to "expr" (Joel Bernstein, Dennis Gove)
|
||||||
|
|
||||||
|
* SOLR-5209: Unloading or deleting the last replica of a shard now no longer
|
||||||
|
cascades to remove the shard from the clusterstate. (Christine Poerschke)
|
||||||
|
|
||||||
======================= 5.5.0 =======================
|
======================= 5.5.0 =======================
|
||||||
|
|
||||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||||
|
|
|
@ -92,46 +92,19 @@ public class SliceMutator {
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, Slice> newSlices = new LinkedHashMap<>();
|
Map<String, Slice> newSlices = new LinkedHashMap<>();
|
||||||
boolean lastSlice = false;
|
|
||||||
|
|
||||||
for (Slice slice : coll.getSlices()) {
|
for (Slice slice : coll.getSlices()) {
|
||||||
Replica replica = slice.getReplica(cnn);
|
Replica replica = slice.getReplica(cnn);
|
||||||
if (replica != null) {
|
if (replica != null) {
|
||||||
Map<String, Replica> newReplicas = slice.getReplicasCopy();
|
Map<String, Replica> newReplicas = slice.getReplicasCopy();
|
||||||
newReplicas.remove(cnn);
|
newReplicas.remove(cnn);
|
||||||
// TODO TODO TODO!!! if there are no replicas left for the slice, and the slice has no hash range, remove it
|
|
||||||
// if (newReplicas.size() == 0 && slice.getRange() == null) {
|
|
||||||
// if there are no replicas left for the slice remove it
|
|
||||||
if (newReplicas.size() == 0) {
|
|
||||||
slice = null;
|
|
||||||
lastSlice = true;
|
|
||||||
} else {
|
|
||||||
slice = new Slice(slice.getName(), newReplicas, slice.getProperties());
|
slice = new Slice(slice.getName(), newReplicas, slice.getProperties());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (slice != null) {
|
|
||||||
newSlices.put(slice.getName(), slice);
|
newSlices.put(slice.getName(), slice);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (lastSlice) {
|
|
||||||
// remove all empty pre allocated slices
|
|
||||||
for (Slice slice : coll.getSlices()) {
|
|
||||||
if (slice.getReplicas().size() == 0) {
|
|
||||||
newSlices.remove(slice.getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// if there are no slices left in the collection, remove it?
|
|
||||||
if (newSlices.size() == 0) {
|
|
||||||
return new ClusterStateMutator(zkStateReader).deleteCollection(clusterState,
|
|
||||||
new ZkNodeProps(Utils.makeMap(NAME, collection)));
|
|
||||||
} else {
|
|
||||||
return new ZkWriteCommand(collection, coll.copyWithSlices(newSlices));
|
return new ZkWriteCommand(collection, coll.copyWithSlices(newSlices));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public ZkWriteCommand setShardLeader(ClusterState clusterState, ZkNodeProps message) {
|
public ZkWriteCommand setShardLeader(ClusterState clusterState, ZkNodeProps message) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
|
@ -365,42 +365,12 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
|
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
|
||||||
|
|
||||||
testStopAndStartCoresInOneInstance();
|
testStopAndStartCoresInOneInstance();
|
||||||
testFailedCoreCreateCleansUp();
|
|
||||||
// Thread.sleep(10000000000L);
|
// Thread.sleep(10000000000L);
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
super.printLayout();
|
super.printLayout();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testFailedCoreCreateCleansUp() throws Exception {
|
|
||||||
Create createCmd = new Create();
|
|
||||||
createCmd.setCoreName("core1");
|
|
||||||
createCmd.setCollection("the_core_collection");
|
|
||||||
String coredataDir = createTempDir().toFile().getAbsolutePath();
|
|
||||||
createCmd.setDataDir(coredataDir);
|
|
||||||
createCmd.setNumShards(1);
|
|
||||||
createCmd.setSchemaName("nonexistent_schema.xml");
|
|
||||||
|
|
||||||
String url = getBaseUrl(clients.get(0));
|
|
||||||
try (final HttpSolrClient client = new HttpSolrClient(url)) {
|
|
||||||
client.request(createCmd);
|
|
||||||
fail("Expected SolrCore create to fail");
|
|
||||||
} catch (Exception e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS);
|
|
||||||
while (cloudClient.getZkStateReader().getZkClient().exists("/collections/the_core_collection", true)) {
|
|
||||||
if (timeout.hasTimedOut()) {
|
|
||||||
fail(cloudClient.getZkStateReader().getZkClient().getChildren("/collections", null, true).toString() + " Collection zk node still exists");
|
|
||||||
}
|
|
||||||
Thread.sleep(100);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
assertFalse("Collection zk node still exists", cloudClient.getZkStateReader().getZkClient().exists("/collections/the_core_collection", true));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void testShardParamVariations() throws Exception {
|
private void testShardParamVariations() throws Exception {
|
||||||
SolrQuery query = new SolrQuery("*:*");
|
SolrQuery query = new SolrQuery("*:*");
|
||||||
Map<String,Long> shardCounts = new HashMap<>();
|
Map<String,Long> shardCounts = new HashMap<>();
|
||||||
|
|
|
@ -509,13 +509,19 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
|
||||||
cloudClient.getZkStateReader().updateClusterState();
|
cloudClient.getZkStateReader().updateClusterState();
|
||||||
|
|
||||||
Collection<Slice> slices = cloudClient.getZkStateReader().getClusterState().getActiveSlices("corewithnocollection3");
|
Collection<Slice> slices = cloudClient.getZkStateReader().getClusterState().getActiveSlices("corewithnocollection3");
|
||||||
assertNull(slices);
|
int replicaCount = 0;
|
||||||
|
if (slices != null) {
|
||||||
|
for (Slice slice : slices) {
|
||||||
|
replicaCount += slice.getReplicas().size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals("replicaCount", 0, replicaCount);
|
||||||
|
|
||||||
CollectionAdminRequest.List list = new CollectionAdminRequest.List();
|
CollectionAdminRequest.List list = new CollectionAdminRequest.List();
|
||||||
CollectionAdminResponse res = new CollectionAdminResponse();
|
CollectionAdminResponse res = new CollectionAdminResponse();
|
||||||
res.setResponse(makeRequest(getBaseUrl((HttpSolrClient) clients.get(1)), list));
|
res.setResponse(makeRequest(getBaseUrl((HttpSolrClient) clients.get(1)), list));
|
||||||
List<String> collections = (List<String>) res.getResponse().get("collections");
|
List<String> collections = (List<String>) res.getResponse().get("collections");
|
||||||
assertFalse(collections.contains("corewithnocollection3"));
|
assertTrue(collections.contains("corewithnocollection3"));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testNodesUsedByCreate() throws Exception {
|
private void testNodesUsedByCreate() throws Exception {
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
import org.apache.solr.common.cloud.DocCollection;
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
import org.apache.solr.common.cloud.Replica;
|
||||||
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||||
import org.apache.solr.common.cloud.ZkStateReader;
|
import org.apache.solr.common.cloud.ZkStateReader;
|
||||||
import org.apache.solr.common.params.MapSolrParams;
|
import org.apache.solr.common.params.MapSolrParams;
|
||||||
|
@ -84,11 +85,12 @@ public class DeleteLastCustomShardedReplicaTest extends AbstractFullDistribZkTes
|
||||||
.getClusterState().getCollection(collectionName);
|
.getClusterState().getCollection(collectionName);
|
||||||
Replica replica = testcoll.getSlice("a").getReplicas().iterator().next();
|
Replica replica = testcoll.getSlice("a").getReplicas().iterator().next();
|
||||||
|
|
||||||
removeAndWaitForLastReplicaGone(client, collectionName, replica, "a");
|
removeAndWaitForReplicaGone(client, collectionName, replica, "a", replicationFactor-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void removeAndWaitForLastReplicaGone(CloudSolrClient client, String COLL_NAME, Replica replica, String shard)
|
protected void removeAndWaitForReplicaGone(CloudSolrClient client, String COLL_NAME, Replica replica, String shard,
|
||||||
|
final int expectedNumReplicasRemaining)
|
||||||
throws SolrServerException, IOException, InterruptedException {
|
throws SolrServerException, IOException, InterruptedException {
|
||||||
Map m = makeMap("collection", COLL_NAME, "action", DELETEREPLICA.toLower(), "shard",
|
Map m = makeMap("collection", COLL_NAME, "action", DELETEREPLICA.toLower(), "shard",
|
||||||
shard, "replica", replica.getName());
|
shard, "replica", replica.getName());
|
||||||
|
@ -102,9 +104,11 @@ public class DeleteLastCustomShardedReplicaTest extends AbstractFullDistribZkTes
|
||||||
while (! timeout.hasTimedOut()) {
|
while (! timeout.hasTimedOut()) {
|
||||||
testcoll = getCommonCloudSolrClient().getZkStateReader()
|
testcoll = getCommonCloudSolrClient().getZkStateReader()
|
||||||
.getClusterState().getCollection(COLL_NAME);
|
.getClusterState().getCollection(COLL_NAME);
|
||||||
// In case of a custom sharded collection, the last replica deletion would also lead to
|
// As of SOLR-5209 the last replica deletion no longer leads to
|
||||||
// the deletion of the slice.
|
// the deletion of the slice.
|
||||||
success = testcoll.getSlice(shard) == null;
|
final Slice slice = testcoll.getSlice(shard);
|
||||||
|
final int actualNumReplicasRemaining = (slice == null ? 0 : slice.getReplicas().size());
|
||||||
|
success = (actualNumReplicasRemaining == expectedNumReplicasRemaining);
|
||||||
if (success) {
|
if (success) {
|
||||||
log.info("replica cleaned up {}/{} core {}",
|
log.info("replica cleaned up {}/{} core {}",
|
||||||
shard + "/" + replica.getName(), replica.getStr("core"));
|
shard + "/" + replica.getName(), replica.getStr("core"));
|
||||||
|
|
|
@ -680,8 +680,10 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||||
mockController.publishState(collection, core, core_node, null, numShards);
|
mockController.publishState(collection, core, core_node, null, numShards);
|
||||||
while (version == getClusterStateVersion(zkClient));
|
while (version == getClusterStateVersion(zkClient));
|
||||||
Thread.sleep(500);
|
Thread.sleep(500);
|
||||||
assertFalse(collection+" should be gone after publishing the null state",
|
assertTrue(collection+" should remain after removal of the last core", // as of SOLR-5209 core removal does not cascade to remove the slice and collection
|
||||||
reader.getClusterState().hasCollection(collection));
|
reader.getClusterState().getCollections().contains(collection));
|
||||||
|
assertTrue(core_node+" should be gone after publishing the null state",
|
||||||
|
null == reader.getClusterState().getCollection(collection).getReplica(core_node));
|
||||||
} finally {
|
} finally {
|
||||||
close(mockController);
|
close(mockController);
|
||||||
close(overseerClient);
|
close(overseerClient);
|
||||||
|
@ -1312,4 +1314,134 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
||||||
return zkClient;
|
return zkClient;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemovalOfLastReplica() throws Exception {
|
||||||
|
|
||||||
|
final Integer numReplicas = 1+random().nextInt(4); // between 1 and 4 replicas
|
||||||
|
final Integer numShards = 1+random().nextInt(4); // between 1 and 4 shards
|
||||||
|
|
||||||
|
final String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
|
||||||
|
final ZkTestServer server = new ZkTestServer(zkDir);
|
||||||
|
|
||||||
|
SolrZkClient zkClient = null;
|
||||||
|
ZkStateReader zkStateReader = null;
|
||||||
|
SolrZkClient overseerClient = null;
|
||||||
|
try {
|
||||||
|
server.run();
|
||||||
|
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||||
|
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||||
|
|
||||||
|
zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
|
||||||
|
ZkController.createClusterZkNodes(zkClient);
|
||||||
|
|
||||||
|
zkStateReader = new ZkStateReader(zkClient);
|
||||||
|
zkStateReader.createClusterStateWatchersAndUpdate();
|
||||||
|
|
||||||
|
overseerClient = electNewOverseer(server.getZkAddress());
|
||||||
|
|
||||||
|
DistributedQueue q = Overseer.getInQueue(zkClient);
|
||||||
|
|
||||||
|
// create collection
|
||||||
|
{
|
||||||
|
final Integer maxShardsPerNode = numReplicas * numShards;
|
||||||
|
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower(),
|
||||||
|
"name", collection,
|
||||||
|
ZkStateReader.NUM_SHARDS_PROP, numShards.toString(),
|
||||||
|
ZkStateReader.REPLICATION_FACTOR, "1",
|
||||||
|
ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode.toString()
|
||||||
|
);
|
||||||
|
q.offer(Utils.toJSON(m));
|
||||||
|
}
|
||||||
|
waitForCollections(zkStateReader, collection);
|
||||||
|
|
||||||
|
// create nodes with state recovering
|
||||||
|
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||||
|
for (int ss = 1; ss <= numShards; ++ss) {
|
||||||
|
final int N = (numReplicas-rr)*numShards + ss;
|
||||||
|
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(),
|
||||||
|
ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr",
|
||||||
|
ZkStateReader.SHARD_ID_PROP, "shard"+ss,
|
||||||
|
ZkStateReader.NODE_NAME_PROP, "node"+N,
|
||||||
|
ZkStateReader.COLLECTION_PROP, collection,
|
||||||
|
ZkStateReader.CORE_NAME_PROP, "core"+N,
|
||||||
|
ZkStateReader.ROLES_PROP, "",
|
||||||
|
ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
|
||||||
|
|
||||||
|
q.offer(Utils.toJSON(m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// verify recovering
|
||||||
|
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||||
|
for (int ss = 1; ss <= numShards; ++ss) {
|
||||||
|
final int N = (numReplicas-rr)*numShards + ss;
|
||||||
|
verifyReplicaStatus(zkStateReader, collection, "shard"+ss, "core_node"+N, Replica.State.RECOVERING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// publish node states (active)
|
||||||
|
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||||
|
for (int ss = 1; ss <= numShards; ++ss) {
|
||||||
|
final int N = (numReplicas-rr)*numShards + ss;
|
||||||
|
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(),
|
||||||
|
ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr",
|
||||||
|
ZkStateReader.NODE_NAME_PROP, "node"+N,
|
||||||
|
ZkStateReader.COLLECTION_PROP, collection,
|
||||||
|
ZkStateReader.CORE_NAME_PROP, "core"+N,
|
||||||
|
ZkStateReader.ROLES_PROP, "",
|
||||||
|
ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
|
||||||
|
|
||||||
|
q.offer(Utils.toJSON(m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// verify active
|
||||||
|
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||||
|
for (int ss = 1; ss <= numShards; ++ss) {
|
||||||
|
final int N = (numReplicas-rr)*numShards + ss;
|
||||||
|
verifyReplicaStatus(zkStateReader, collection, "shard"+ss, "core_node"+N, Replica.State.ACTIVE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete node
|
||||||
|
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||||
|
for (int ss = 1; ss <= numShards; ++ss) {
|
||||||
|
final int N = (numReplicas-rr)*numShards + ss;
|
||||||
|
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
|
||||||
|
ZkStateReader.COLLECTION_PROP, collection,
|
||||||
|
ZkStateReader.CORE_NODE_NAME_PROP, "core_node"+N);
|
||||||
|
|
||||||
|
q.offer(Utils.toJSON(m));
|
||||||
|
|
||||||
|
{
|
||||||
|
int iterationsLeft = 100;
|
||||||
|
while (iterationsLeft-- > 0) {
|
||||||
|
final Slice slice = zkStateReader.getClusterState().getSlice(collection, "shard"+ss);
|
||||||
|
if (null == slice || null == slice.getReplicasMap().get("core_node"+N)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (VERBOSE) log.info("still seeing {} shard{} core_node{}, rechecking in 50ms ({} iterations left)", collection, ss, N, iterationsLeft);
|
||||||
|
Thread.sleep(50);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final DocCollection docCollection = zkStateReader.getClusterState().getCollection(collection);
|
||||||
|
assertTrue("found no "+collection, (null != docCollection));
|
||||||
|
|
||||||
|
final Slice slice = docCollection.getSlice("shard"+ss);
|
||||||
|
assertTrue("found no "+collection+" shard"+ss+" slice after removal of replica "+rr+" of "+numReplicas, (null != slice));
|
||||||
|
|
||||||
|
final Collection<Replica> replicas = slice.getReplicas();
|
||||||
|
assertEquals("wrong number of "+collection+" shard"+ss+" replicas left, replicas="+replicas, numReplicas-rr, replicas.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
|
||||||
|
close(overseerClient);
|
||||||
|
close(zkStateReader);
|
||||||
|
close(zkClient);
|
||||||
|
|
||||||
|
server.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,28 +149,19 @@ public class UnloadDistributedZkTest extends BasicDistributedZkTest {
|
||||||
unloadCmd.setCoreName(unloadCmdCoreName1);
|
unloadCmd.setCoreName(unloadCmdCoreName1);
|
||||||
adminClient.request(unloadCmd);
|
adminClient.request(unloadCmd);
|
||||||
|
|
||||||
// there should be only one shard
|
// there should still be two shards (as of SOLR-5209)
|
||||||
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName1, false /* shouldBePresent */, numShards-1 /* expectedSliceCount */);
|
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName1, false /* shouldBePresent */, numShards /* expectedSliceCount */);
|
||||||
|
|
||||||
// now unload one of the other
|
// now unload one of the other
|
||||||
unloadCmd = new Unload(false);
|
unloadCmd = new Unload(false);
|
||||||
unloadCmd.setCoreName(unloadCmdCoreName2);
|
unloadCmd.setCoreName(unloadCmdCoreName2);
|
||||||
adminClient.request(unloadCmd);
|
adminClient.request(unloadCmd);
|
||||||
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName2, false /* shouldBePresent */, numShards-2 /* expectedSliceCount */);
|
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName2, false /* shouldBePresent */, numShards /* expectedSliceCount */);
|
||||||
}
|
}
|
||||||
|
|
||||||
//printLayout();
|
//printLayout();
|
||||||
// the collection should be gone
|
// the collection should still be present (as of SOLR-5209 replica removal does not cascade to remove the slice and collection)
|
||||||
final TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
|
assertTrue("No longer found collection "+collection, getCommonCloudSolrClient().getZkStateReader().getClusterState().hasCollection(collection));
|
||||||
while (getCommonCloudSolrClient().getZkStateReader().getClusterState().hasCollection(collection)) {
|
|
||||||
if (timeout.hasTimedOut()) {
|
|
||||||
printLayout();
|
|
||||||
fail("Still found collection");
|
|
||||||
}
|
|
||||||
|
|
||||||
Thread.sleep(50);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue