mirror of https://github.com/apache/lucene.git
SOLR-5209: Unloading or deleting the last replica of a shard now no longer cascades to remove the shard from the clusterstate.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1724098 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
439c29ae28
commit
c5883d54b9
|
@ -210,6 +210,9 @@ Other Changes
|
|||
|
||||
* SOLR-8443: Change /stream handler http param from "stream" to "expr" (Joel Bernstein, Dennis Gove)
|
||||
|
||||
* SOLR-5209: Unloading or deleting the last replica of a shard now no longer
|
||||
cascades to remove the shard from the clusterstate. (Christine Poerschke)
|
||||
|
||||
======================= 5.5.0 =======================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||
|
|
|
@ -92,45 +92,18 @@ public class SliceMutator {
|
|||
}
|
||||
|
||||
Map<String, Slice> newSlices = new LinkedHashMap<>();
|
||||
boolean lastSlice = false;
|
||||
|
||||
for (Slice slice : coll.getSlices()) {
|
||||
Replica replica = slice.getReplica(cnn);
|
||||
if (replica != null) {
|
||||
Map<String, Replica> newReplicas = slice.getReplicasCopy();
|
||||
newReplicas.remove(cnn);
|
||||
// TODO TODO TODO!!! if there are no replicas left for the slice, and the slice has no hash range, remove it
|
||||
// if (newReplicas.size() == 0 && slice.getRange() == null) {
|
||||
// if there are no replicas left for the slice remove it
|
||||
if (newReplicas.size() == 0) {
|
||||
slice = null;
|
||||
lastSlice = true;
|
||||
} else {
|
||||
slice = new Slice(slice.getName(), newReplicas, slice.getProperties());
|
||||
}
|
||||
}
|
||||
|
||||
if (slice != null) {
|
||||
newSlices.put(slice.getName(), slice);
|
||||
slice = new Slice(slice.getName(), newReplicas, slice.getProperties());
|
||||
}
|
||||
newSlices.put(slice.getName(), slice);
|
||||
}
|
||||
|
||||
if (lastSlice) {
|
||||
// remove all empty pre allocated slices
|
||||
for (Slice slice : coll.getSlices()) {
|
||||
if (slice.getReplicas().size() == 0) {
|
||||
newSlices.remove(slice.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if there are no slices left in the collection, remove it?
|
||||
if (newSlices.size() == 0) {
|
||||
return new ClusterStateMutator(zkStateReader).deleteCollection(clusterState,
|
||||
new ZkNodeProps(Utils.makeMap(NAME, collection)));
|
||||
} else {
|
||||
return new ZkWriteCommand(collection, coll.copyWithSlices(newSlices));
|
||||
}
|
||||
return new ZkWriteCommand(collection, coll.copyWithSlices(newSlices));
|
||||
}
|
||||
|
||||
public ZkWriteCommand setShardLeader(ClusterState clusterState, ZkNodeProps message) {
|
||||
|
|
|
@ -365,42 +365,12 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
|
||||
|
||||
testStopAndStartCoresInOneInstance();
|
||||
testFailedCoreCreateCleansUp();
|
||||
// Thread.sleep(10000000000L);
|
||||
if (DEBUG) {
|
||||
super.printLayout();
|
||||
}
|
||||
}
|
||||
|
||||
private void testFailedCoreCreateCleansUp() throws Exception {
|
||||
Create createCmd = new Create();
|
||||
createCmd.setCoreName("core1");
|
||||
createCmd.setCollection("the_core_collection");
|
||||
String coredataDir = createTempDir().toFile().getAbsolutePath();
|
||||
createCmd.setDataDir(coredataDir);
|
||||
createCmd.setNumShards(1);
|
||||
createCmd.setSchemaName("nonexistent_schema.xml");
|
||||
|
||||
String url = getBaseUrl(clients.get(0));
|
||||
try (final HttpSolrClient client = new HttpSolrClient(url)) {
|
||||
client.request(createCmd);
|
||||
fail("Expected SolrCore create to fail");
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
|
||||
TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS);
|
||||
while (cloudClient.getZkStateReader().getZkClient().exists("/collections/the_core_collection", true)) {
|
||||
if (timeout.hasTimedOut()) {
|
||||
fail(cloudClient.getZkStateReader().getZkClient().getChildren("/collections", null, true).toString() + " Collection zk node still exists");
|
||||
}
|
||||
Thread.sleep(100);
|
||||
}
|
||||
|
||||
|
||||
assertFalse("Collection zk node still exists", cloudClient.getZkStateReader().getZkClient().exists("/collections/the_core_collection", true));
|
||||
}
|
||||
|
||||
private void testShardParamVariations() throws Exception {
|
||||
SolrQuery query = new SolrQuery("*:*");
|
||||
Map<String,Long> shardCounts = new HashMap<>();
|
||||
|
|
|
@ -509,13 +509,19 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa
|
|||
cloudClient.getZkStateReader().updateClusterState();
|
||||
|
||||
Collection<Slice> slices = cloudClient.getZkStateReader().getClusterState().getActiveSlices("corewithnocollection3");
|
||||
assertNull(slices);
|
||||
int replicaCount = 0;
|
||||
if (slices != null) {
|
||||
for (Slice slice : slices) {
|
||||
replicaCount += slice.getReplicas().size();
|
||||
}
|
||||
}
|
||||
assertEquals("replicaCount", 0, replicaCount);
|
||||
|
||||
CollectionAdminRequest.List list = new CollectionAdminRequest.List();
|
||||
CollectionAdminResponse res = new CollectionAdminResponse();
|
||||
res.setResponse(makeRequest(getBaseUrl((HttpSolrClient) clients.get(1)), list));
|
||||
List<String> collections = (List<String>) res.getResponse().get("collections");
|
||||
assertFalse(collections.contains("corewithnocollection3"));
|
||||
assertTrue(collections.contains("corewithnocollection3"));
|
||||
}
|
||||
|
||||
private void testNodesUsedByCreate() throws Exception {
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.solr.client.solrj.request.QueryRequest;
|
|||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
|
@ -84,11 +85,12 @@ public class DeleteLastCustomShardedReplicaTest extends AbstractFullDistribZkTes
|
|||
.getClusterState().getCollection(collectionName);
|
||||
Replica replica = testcoll.getSlice("a").getReplicas().iterator().next();
|
||||
|
||||
removeAndWaitForLastReplicaGone(client, collectionName, replica, "a");
|
||||
removeAndWaitForReplicaGone(client, collectionName, replica, "a", replicationFactor-1);
|
||||
}
|
||||
}
|
||||
|
||||
protected void removeAndWaitForLastReplicaGone(CloudSolrClient client, String COLL_NAME, Replica replica, String shard)
|
||||
protected void removeAndWaitForReplicaGone(CloudSolrClient client, String COLL_NAME, Replica replica, String shard,
|
||||
final int expectedNumReplicasRemaining)
|
||||
throws SolrServerException, IOException, InterruptedException {
|
||||
Map m = makeMap("collection", COLL_NAME, "action", DELETEREPLICA.toLower(), "shard",
|
||||
shard, "replica", replica.getName());
|
||||
|
@ -102,9 +104,11 @@ public class DeleteLastCustomShardedReplicaTest extends AbstractFullDistribZkTes
|
|||
while (! timeout.hasTimedOut()) {
|
||||
testcoll = getCommonCloudSolrClient().getZkStateReader()
|
||||
.getClusterState().getCollection(COLL_NAME);
|
||||
// In case of a custom sharded collection, the last replica deletion would also lead to
|
||||
// As of SOLR-5209 the last replica deletion no longer leads to
|
||||
// the deletion of the slice.
|
||||
success = testcoll.getSlice(shard) == null;
|
||||
final Slice slice = testcoll.getSlice(shard);
|
||||
final int actualNumReplicasRemaining = (slice == null ? 0 : slice.getReplicas().size());
|
||||
success = (actualNumReplicasRemaining == expectedNumReplicasRemaining);
|
||||
if (success) {
|
||||
log.info("replica cleaned up {}/{} core {}",
|
||||
shard + "/" + replica.getName(), replica.getStr("core"));
|
||||
|
|
|
@ -680,8 +680,10 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
mockController.publishState(collection, core, core_node, null, numShards);
|
||||
while (version == getClusterStateVersion(zkClient));
|
||||
Thread.sleep(500);
|
||||
assertFalse(collection+" should be gone after publishing the null state",
|
||||
reader.getClusterState().hasCollection(collection));
|
||||
assertTrue(collection+" should remain after removal of the last core", // as of SOLR-5209 core removal does not cascade to remove the slice and collection
|
||||
reader.getClusterState().getCollections().contains(collection));
|
||||
assertTrue(core_node+" should be gone after publishing the null state",
|
||||
null == reader.getClusterState().getCollection(collection).getReplica(core_node));
|
||||
} finally {
|
||||
close(mockController);
|
||||
close(overseerClient);
|
||||
|
@ -1312,4 +1314,134 @@ public class OverseerTest extends SolrTestCaseJ4 {
|
|||
return zkClient;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemovalOfLastReplica() throws Exception {
|
||||
|
||||
final Integer numReplicas = 1+random().nextInt(4); // between 1 and 4 replicas
|
||||
final Integer numShards = 1+random().nextInt(4); // between 1 and 4 shards
|
||||
|
||||
final String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
|
||||
final ZkTestServer server = new ZkTestServer(zkDir);
|
||||
|
||||
SolrZkClient zkClient = null;
|
||||
ZkStateReader zkStateReader = null;
|
||||
SolrZkClient overseerClient = null;
|
||||
try {
|
||||
server.run();
|
||||
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||
|
||||
zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
|
||||
ZkController.createClusterZkNodes(zkClient);
|
||||
|
||||
zkStateReader = new ZkStateReader(zkClient);
|
||||
zkStateReader.createClusterStateWatchersAndUpdate();
|
||||
|
||||
overseerClient = electNewOverseer(server.getZkAddress());
|
||||
|
||||
DistributedQueue q = Overseer.getInQueue(zkClient);
|
||||
|
||||
// create collection
|
||||
{
|
||||
final Integer maxShardsPerNode = numReplicas * numShards;
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower(),
|
||||
"name", collection,
|
||||
ZkStateReader.NUM_SHARDS_PROP, numShards.toString(),
|
||||
ZkStateReader.REPLICATION_FACTOR, "1",
|
||||
ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode.toString()
|
||||
);
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
waitForCollections(zkStateReader, collection);
|
||||
|
||||
// create nodes with state recovering
|
||||
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||
for (int ss = 1; ss <= numShards; ++ss) {
|
||||
final int N = (numReplicas-rr)*numShards + ss;
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(),
|
||||
ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr",
|
||||
ZkStateReader.SHARD_ID_PROP, "shard"+ss,
|
||||
ZkStateReader.NODE_NAME_PROP, "node"+N,
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.CORE_NAME_PROP, "core"+N,
|
||||
ZkStateReader.ROLES_PROP, "",
|
||||
ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
|
||||
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
// verify recovering
|
||||
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||
for (int ss = 1; ss <= numShards; ++ss) {
|
||||
final int N = (numReplicas-rr)*numShards + ss;
|
||||
verifyReplicaStatus(zkStateReader, collection, "shard"+ss, "core_node"+N, Replica.State.RECOVERING);
|
||||
}
|
||||
}
|
||||
|
||||
// publish node states (active)
|
||||
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||
for (int ss = 1; ss <= numShards; ++ss) {
|
||||
final int N = (numReplicas-rr)*numShards + ss;
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(),
|
||||
ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr",
|
||||
ZkStateReader.NODE_NAME_PROP, "node"+N,
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.CORE_NAME_PROP, "core"+N,
|
||||
ZkStateReader.ROLES_PROP, "",
|
||||
ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
|
||||
|
||||
q.offer(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
// verify active
|
||||
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||
for (int ss = 1; ss <= numShards; ++ss) {
|
||||
final int N = (numReplicas-rr)*numShards + ss;
|
||||
verifyReplicaStatus(zkStateReader, collection, "shard"+ss, "core_node"+N, Replica.State.ACTIVE);
|
||||
}
|
||||
}
|
||||
|
||||
// delete node
|
||||
for (int rr = 1; rr <= numReplicas; ++rr) {
|
||||
for (int ss = 1; ss <= numShards; ++ss) {
|
||||
final int N = (numReplicas-rr)*numShards + ss;
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
|
||||
ZkStateReader.COLLECTION_PROP, collection,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, "core_node"+N);
|
||||
|
||||
q.offer(Utils.toJSON(m));
|
||||
|
||||
{
|
||||
int iterationsLeft = 100;
|
||||
while (iterationsLeft-- > 0) {
|
||||
final Slice slice = zkStateReader.getClusterState().getSlice(collection, "shard"+ss);
|
||||
if (null == slice || null == slice.getReplicasMap().get("core_node"+N)) {
|
||||
break;
|
||||
}
|
||||
if (VERBOSE) log.info("still seeing {} shard{} core_node{}, rechecking in 50ms ({} iterations left)", collection, ss, N, iterationsLeft);
|
||||
Thread.sleep(50);
|
||||
}
|
||||
}
|
||||
|
||||
final DocCollection docCollection = zkStateReader.getClusterState().getCollection(collection);
|
||||
assertTrue("found no "+collection, (null != docCollection));
|
||||
|
||||
final Slice slice = docCollection.getSlice("shard"+ss);
|
||||
assertTrue("found no "+collection+" shard"+ss+" slice after removal of replica "+rr+" of "+numReplicas, (null != slice));
|
||||
|
||||
final Collection<Replica> replicas = slice.getReplicas();
|
||||
assertEquals("wrong number of "+collection+" shard"+ss+" replicas left, replicas="+replicas, numReplicas-rr, replicas.size());
|
||||
}
|
||||
}
|
||||
|
||||
} finally {
|
||||
|
||||
close(overseerClient);
|
||||
close(zkStateReader);
|
||||
close(zkClient);
|
||||
|
||||
server.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -149,28 +149,19 @@ public class UnloadDistributedZkTest extends BasicDistributedZkTest {
|
|||
unloadCmd.setCoreName(unloadCmdCoreName1);
|
||||
adminClient.request(unloadCmd);
|
||||
|
||||
// there should be only one shard
|
||||
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName1, false /* shouldBePresent */, numShards-1 /* expectedSliceCount */);
|
||||
// there should still be two shards (as of SOLR-5209)
|
||||
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName1, false /* shouldBePresent */, numShards /* expectedSliceCount */);
|
||||
|
||||
// now unload one of the other
|
||||
unloadCmd = new Unload(false);
|
||||
unloadCmd.setCoreName(unloadCmdCoreName2);
|
||||
adminClient.request(unloadCmd);
|
||||
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName2, false /* shouldBePresent */, numShards-2 /* expectedSliceCount */);
|
||||
checkCoreNamePresenceAndSliceCount(collection, unloadCmdCoreName2, false /* shouldBePresent */, numShards /* expectedSliceCount */);
|
||||
}
|
||||
|
||||
//printLayout();
|
||||
// the collection should be gone
|
||||
final TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
|
||||
while (getCommonCloudSolrClient().getZkStateReader().getClusterState().hasCollection(collection)) {
|
||||
if (timeout.hasTimedOut()) {
|
||||
printLayout();
|
||||
fail("Still found collection");
|
||||
}
|
||||
|
||||
Thread.sleep(50);
|
||||
}
|
||||
|
||||
// the collection should still be present (as of SOLR-5209 replica removal does not cascade to remove the slice and collection)
|
||||
assertTrue("No longer found collection "+collection, getCommonCloudSolrClient().getZkStateReader().getClusterState().hasCollection(collection));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue