SOLR-12412: Fix test failure

This commit is contained in:
Cao Manh Dat 2018-07-17 18:42:29 +07:00
parent be475bbc7e
commit 705e6f76a4
3 changed files with 75 additions and 51 deletions

View File

@ -17,6 +17,7 @@
package org.apache.solr.cloud; package org.apache.solr.cloud;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
@ -34,6 +35,7 @@ import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.MockDirectoryFactory;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -43,64 +45,71 @@ import org.slf4j.LoggerFactory;
public class LeaderTragicEventTest extends SolrCloudTestCase { public class LeaderTragicEventTest extends SolrCloudTestCase {
private static final String COLLECTION = "collection1";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@BeforeClass @BeforeClass
public static void setupCluster() throws Exception { public static void setupCluster() throws Exception {
System.setProperty("solr.mscheduler", "org.apache.solr.core.MockConcurrentMergeScheduler"); System.setProperty("solr.mscheduler", "org.apache.solr.core.MockConcurrentMergeScheduler");
System.setProperty(MockDirectoryFactory.SOLR_TESTS_USING_MOCK_DIRECTORY_WRAPPER, "true");
configureCluster(2) configureCluster(2)
.addConfig("config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) .addConfig("config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.configure(); .configure();
cluster.getSolrClient().setDefaultCollection(COLLECTION);
} }
@AfterClass @AfterClass
public static void cleanup() { public static void cleanup() {
System.clearProperty("solr.mscheduler"); System.clearProperty("solr.mscheduler");
System.clearProperty(MockDirectoryFactory.SOLR_TESTS_USING_MOCK_DIRECTORY_WRAPPER);
} }
@Test @Test
public void test() throws Exception { public void test() throws Exception {
final String collection = "collection1";
cluster.getSolrClient().setDefaultCollection(collection);
CollectionAdminRequest CollectionAdminRequest
.createCollection(COLLECTION, "config", 1, 2) .createCollection(collection, "config", 1, 2)
.process(cluster.getSolrClient()); .process(cluster.getSolrClient());
ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), COLLECTION, 120000); ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), collection, 120000);
try {
List<String> addedIds = new ArrayList<>();
Replica oldLeader = corruptLeader(collection, addedIds);
List<String> addedIds = new ArrayList<>(); waitForState("Timeout waiting for new replica become leader", collection, (liveNodes, collectionState) -> {
Replica oldLeader = corruptLeader(addedIds); Slice slice = collectionState.getSlice("shard1");
waitForState("Timeout waiting for new replica become leader", COLLECTION, (liveNodes, collectionState) -> { if (slice.getReplicas().size() != 2) return false;
Slice slice = collectionState.getSlice("shard1"); if (slice.getLeader() == null) return false;
if (slice.getLeader().getName().equals(oldLeader.getName())) return false;
if (slice.getReplicas().size() != 2) return false; return true;
if (slice.getLeader().getName().equals(oldLeader.getName())) return false; });
ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), collection, 120000);
Slice shard = getCollectionState(collection).getSlice("shard1");
assertNotSame(shard.getLeader().getNodeName(), oldLeader.getNodeName());
assertEquals(getNonLeader(shard).getNodeName(), oldLeader.getNodeName());
return true; for (String id : addedIds) {
}); assertNotNull(cluster.getSolrClient().getById(collection,id));
ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), COLLECTION, 120000); }
Slice shard = getCollectionState(COLLECTION).getSlice("shard1"); log.info("The test success oldLeader:{} currentState:{}", oldLeader, getCollectionState(collection));
assertNotSame(shard.getLeader().getNodeName(), oldLeader.getNodeName());
assertEquals(getNonLeader(shard).getNodeName(), oldLeader.getNodeName());
for (String id : addedIds) { } finally {
assertNotNull(cluster.getSolrClient().getById(COLLECTION,id)); CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
} }
log.info("The test success oldLeader:{} currentState:{}", oldLeader, getCollectionState(COLLECTION));
CollectionAdminRequest.deleteCollection(COLLECTION).process(cluster.getSolrClient());
} }
private Replica corruptLeader(List<String> addedIds) throws IOException { private Replica corruptLeader(String collection, List<String> addedIds) throws IOException {
DocCollection dc = getCollectionState(COLLECTION); DocCollection dc = getCollectionState(collection);
Replica oldLeader = dc.getLeader("shard1"); Replica oldLeader = dc.getLeader("shard1");
log.info("Corrupt leader : {}", oldLeader);
CoreContainer leaderCC = cluster.getReplicaJetty(oldLeader).getCoreContainer(); CoreContainer leaderCC = cluster.getReplicaJetty(oldLeader).getCoreContainer();
SolrCore leaderCore = leaderCC.getCores().iterator().next(); SolrCore leaderCore = leaderCC.getCores().iterator().next();
MockDirectoryWrapper dir = (MockDirectoryWrapper) leaderCore.getDirectoryFactory().get(leaderCore.getIndexDir(), DirectoryFactory.DirContext.DEFAULT, leaderCore.getSolrConfig().indexConfig.lockType); MockDirectoryWrapper mockDir = (MockDirectoryWrapper) leaderCore.getDirectoryFactory()
leaderCore.getDirectoryFactory().release(dir); .get(leaderCore.getIndexDir(), DirectoryFactory.DirContext.DEFAULT, leaderCore.getSolrConfig().indexConfig.lockType);
leaderCore.getDirectoryFactory().release(mockDir);
try (HttpSolrClient solrClient = new HttpSolrClient.Builder(dc.getLeader("shard1").getCoreUrl()).build()) { try (HttpSolrClient solrClient = new HttpSolrClient.Builder(dc.getLeader("shard1").getCoreUrl()).build()) {
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
@ -110,16 +119,21 @@ public class LeaderTragicEventTest extends SolrCloudTestCase {
solrClient.commit(); solrClient.commit();
addedIds.add(i + ""); addedIds.add(i + "");
for (String file : dir.listAll()) { for (String file : mockDir.listAll()) {
if (file.contains("segments_")) continue; if (file.contains("segments_")) continue;
if (file.endsWith("si")) continue; if (file.endsWith("si")) continue;
if (file.endsWith("fnm")) continue; if (file.endsWith("fnm")) continue;
if (random().nextBoolean()) continue; if (random().nextBoolean()) continue;
dir.corruptFiles(Collections.singleton(file)); try {
mockDir.corruptFiles(Collections.singleton(file));
} catch (RuntimeException | FileNotFoundException e) {
// merges can lead to this exception
}
} }
} }
} catch (Exception e) { } catch (Exception e) {
log.info("Corrupt leader ex: ",e);
// Expected // Expected
} }
return oldLeader; return oldLeader;
@ -132,34 +146,39 @@ public class LeaderTragicEventTest extends SolrCloudTestCase {
@Test @Test
public void testOtherReplicasAreNotActive() throws Exception { public void testOtherReplicasAreNotActive() throws Exception {
final String collection = "collection2";
cluster.getSolrClient().setDefaultCollection(collection);
int numReplicas = random().nextInt(2) + 1; int numReplicas = random().nextInt(2) + 1;
// won't do anything if leader is the only one active replica in the shard // won't do anything if leader is the only one active replica in the shard
CollectionAdminRequest CollectionAdminRequest
.createCollection(COLLECTION, "config", 1, numReplicas) .createCollection(collection, "config", 1, numReplicas)
.process(cluster.getSolrClient()); .process(cluster.getSolrClient());
ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), COLLECTION, 120000); ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), collection, 120000);
JettySolrRunner otherReplicaJetty = null; try {
if (numReplicas == 2) { JettySolrRunner otherReplicaJetty = null;
Slice shard = getCollectionState(COLLECTION).getSlice("shard1"); if (numReplicas == 2) {
otherReplicaJetty = cluster.getReplicaJetty(getNonLeader(shard)); Slice shard = getCollectionState(collection).getSlice("shard1");
otherReplicaJetty.stop(); otherReplicaJetty = cluster.getReplicaJetty(getNonLeader(shard));
waitForState("Timeout waiting for replica get down", COLLECTION, (liveNodes, collectionState) -> getNonLeader(collectionState.getSlice("shard1")).getState() != Replica.State.ACTIVE); log.info("Stop jetty node : {} state:{}", otherReplicaJetty.getBaseUrl(), getCollectionState(collection));
otherReplicaJetty.stop();
waitForState("Timeout waiting for replica get down", collection, (liveNodes, collectionState) -> getNonLeader(collectionState.getSlice("shard1")).getState() != Replica.State.ACTIVE);
}
Replica oldLeader = corruptLeader(collection, new ArrayList<>());
//TODO better way to test this
Thread.sleep(5000);
Replica leader = getCollectionState(collection).getSlice("shard1").getLeader();
assertEquals(leader.getName(), oldLeader.getName());
if (otherReplicaJetty != null) {
// won't be able to do anything here, since this replica can't recovery from the leader
otherReplicaJetty.start();
}
} finally {
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
} }
Replica oldLeader = corruptLeader(new ArrayList<>());
//TODO better way to test this
Thread.sleep(5000);
Replica leader = getCollectionState(COLLECTION).getSlice("shard1").getLeader();
assertEquals(leader.getName(), oldLeader.getName());
if (otherReplicaJetty != null) {
// won't be able to do anything here, since this replica can't recovery from the leader
otherReplicaJetty.start();
}
CollectionAdminRequest.deleteCollection(COLLECTION).process(cluster.getSolrClient());
} }

View File

@ -536,6 +536,7 @@ public class MiniSolrCloudCluster {
*/ */
public JettySolrRunner getReplicaJetty(Replica replica) { public JettySolrRunner getReplicaJetty(Replica replica) {
for (JettySolrRunner jetty : jettys) { for (JettySolrRunner jetty : jettys) {
if (jetty.isStopped()) continue;
if (replica.getCoreUrl().startsWith(jetty.getBaseUrl().toString())) if (replica.getCoreUrl().startsWith(jetty.getBaseUrl().toString()))
return jetty; return jetty;
} }

View File

@ -33,7 +33,9 @@ import org.apache.lucene.util.LuceneTestCase;
public class MockDirectoryFactory extends EphemeralDirectoryFactory { public class MockDirectoryFactory extends EphemeralDirectoryFactory {
public static final String SOLR_TESTS_ALLOW_READING_FILES_STILL_OPEN_FOR_WRITE = "solr.tests.allow_reading_files_still_open_for_write"; public static final String SOLR_TESTS_ALLOW_READING_FILES_STILL_OPEN_FOR_WRITE = "solr.tests.allow_reading_files_still_open_for_write";
public static final String SOLR_TESTS_USING_MOCK_DIRECTORY_WRAPPER = "solr.tests.using_mock_directory_wrapper";
private boolean allowReadingFilesStillOpenForWrite = Boolean.getBoolean(SOLR_TESTS_ALLOW_READING_FILES_STILL_OPEN_FOR_WRITE); private boolean allowReadingFilesStillOpenForWrite = Boolean.getBoolean(SOLR_TESTS_ALLOW_READING_FILES_STILL_OPEN_FOR_WRITE);
private boolean useMockDirectoryWrapper = Boolean.getBoolean(SOLR_TESTS_USING_MOCK_DIRECTORY_WRAPPER);
@Override @Override
protected LockFactory createLockFactory(String rawLockType) throws IOException { protected LockFactory createLockFactory(String rawLockType) throws IOException {
@ -42,7 +44,9 @@ public class MockDirectoryFactory extends EphemeralDirectoryFactory {
@Override @Override
protected Directory create(String path, LockFactory lockFactory, DirContext dirContext) throws IOException { protected Directory create(String path, LockFactory lockFactory, DirContext dirContext) throws IOException {
Directory dir = LuceneTestCase.newDirectory(); // we ignore the given lock factory Directory dir;
if (useMockDirectoryWrapper) dir = LuceneTestCase.newMockDirectory();
else dir = LuceneTestCase.newDirectory(); // we ignore the given lock factory
Directory cdir = reduce(dir); Directory cdir = reduce(dir);
cdir = reduce(cdir); cdir = reduce(cdir);