SOLR-13074: MoveReplicaHDFSTest leaks threads, falls into an endless loop, logging like crazy (Kevin Risden)

Signed-off-by: Kevin Risden <krisden@apache.org>
This commit is contained in:
Kevin Risden 2019-02-02 14:40:44 -05:00
parent 9a6f942f82
commit 9753e00294
No known key found for this signature in database
GPG Key ID: 040FAE3292C5F73F
5 changed files with 37 additions and 112 deletions

View File

@ -105,6 +105,8 @@ Other Changes
* SOLR-13060: Improve HdfsAutoAddReplicasIntegrationTest and HdfsCollectionsAPIDistributedZkTest (Kevin Risden) * SOLR-13060: Improve HdfsAutoAddReplicasIntegrationTest and HdfsCollectionsAPIDistributedZkTest (Kevin Risden)
* SOLR-13074: MoveReplicaHDFSTest leaks threads, falls into an endless loop, logging like crazy (Kevin Risden)
================== 8.0.0 ================== ================== 8.0.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -40,8 +40,7 @@ import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ThreadLeakFilters(defaultFilters = true, filters = { @ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class, // hdfs currently leaks thread(s) BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
MoveReplicaHDFSTest.ForkJoinThreadsFilter.class
}) })
@Nightly // test is too long for non nightly @Nightly // test is too long for non nightly
public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase { public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {

View File

@ -16,75 +16,51 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import java.io.IOException;
import com.carrotsearch.randomizedtesting.ThreadFilter;
import com.carrotsearch.randomizedtesting.annotations.Nightly; import com.carrotsearch.randomizedtesting.annotations.Nightly;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
import org.apache.lucene.util.TimeUnits; import org.apache.lucene.util.TimeUnits;
import org.apache.solr.cloud.hdfs.HdfsTestUtil; import org.apache.solr.cloud.hdfs.HdfsTestUtil;
import org.apache.solr.common.cloud.ZkConfigManager;
import org.apache.solr.util.BadHdfsThreadsFilter; import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
/** @Slow
* @Nightly
*/
@ThreadLeakFilters(defaultFilters = true, filters = { @ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class, // hdfs currently leaks thread(s) BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
MoveReplicaHDFSTest.ForkJoinThreadsFilter.class
}) })
@Nightly // test is too long for non nightly
@TimeoutSuite(millis = TimeUnits.HOUR) @TimeoutSuite(millis = TimeUnits.HOUR)
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-13060")
public class MoveReplicaHDFSTest extends MoveReplicaTest { public class MoveReplicaHDFSTest extends MoveReplicaTest {
private static MiniDFSCluster dfsCluster; private static MiniDFSCluster dfsCluster;
@BeforeClass @BeforeClass
public static void setupClass() throws Exception { public static void setupClass() throws Exception {
System.setProperty("solr.hdfs.blockcache.enabled", "false"); System.setProperty("solr.hdfs.blockcache.blocksperbank", "512");
System.setProperty("tests.hdfs.numdatanodes", "1");
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath()); dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
ZkConfigManager configManager = new ZkConfigManager(zkClient());
configManager.uploadConfigDir(configset("cloud-hdfs"), "conf1");
System.setProperty("solr.hdfs.home", HdfsTestUtil.getDataDir(dfsCluster, "data"));
} }
@AfterClass @AfterClass
public static void teardownClass() throws Exception { public static void teardownClass() throws Exception {
try { try {
IOUtils.close( HdfsTestUtil.teardownClass(dfsCluster);
() -> {
try {
if (cluster != null) cluster.shutdown();
} catch (Exception e) {
throw new IOException("Could not shut down the cluster.", e);
}
},
() -> {
try {
if (dfsCluster != null) HdfsTestUtil.teardownClass(dfsCluster);
} catch (Exception e) {
throw new IOException("Could not shut down dfs cluster.", e);
}
}
);
} finally { } finally {
cluster = null;
dfsCluster = null; dfsCluster = null;
System.setProperty("solr.hdfs.blockcache.blocksperbank", "512");
System.setProperty("tests.hdfs.numdatanodes", "1");
} }
} }
@Override
protected String getConfigSet() {
return "cloud-hdfs";
}
@Test @Test
// 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") //2018-03-10
public void testNormalMove() throws Exception { public void testNormalMove() throws Exception {
inPlaceMove = false; inPlaceMove = false;
test(); test();
@ -108,13 +84,5 @@ public class MoveReplicaHDFSTest extends MoveReplicaTest {
public void testFailedMove() throws Exception { public void testFailedMove() throws Exception {
super.testFailedMove(); super.testFailedMove();
} }
public static class ForkJoinThreadsFilter implements ThreadFilter {
@Override
public boolean reject(Thread t) {
String name = t.getName();
return name.startsWith("ForkJoinPool.commonPool");
}
}
} }

View File

@ -22,7 +22,6 @@ import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -41,46 +40,38 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.IdUtils; import org.apache.solr.util.IdUtils;
import org.apache.solr.util.LogLevel;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@LogLevel("org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.autoscaling=DEBUG;")
public class MoveReplicaTest extends SolrCloudTestCase { public class MoveReplicaTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// used by MoveReplicaHDFSTest // used by MoveReplicaHDFSTest
protected boolean inPlaceMove = true; protected boolean inPlaceMove = true;
@BeforeClass protected String getConfigSet() {
public static void setupCluster() throws Exception { return "cloud-dynamic";
}
protected String getSolrXml() {
return "solr.xml";
} }
@Before @Before
public void beforeTest() throws Exception { public void beforeTest() throws Exception {
inPlaceMove = true; inPlaceMove = true;
configureCluster(4) configureCluster(4)
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-dynamic").resolve("conf")) .addConfig("conf1", configset(getConfigSet()))
.addConfig("conf2", configset(getConfigSet()))
.withSolrXml(TEST_PATH().resolve("solr.xml"))
.configure(); .configure();
NamedList<Object> overSeerStatus = cluster.getSolrClient().request(CollectionAdminRequest.getOverseerStatus()); NamedList<Object> overSeerStatus = cluster.getSolrClient().request(CollectionAdminRequest.getOverseerStatus());
JettySolrRunner overseerJetty = null; JettySolrRunner overseerJetty = null;
String overseerLeader = (String) overSeerStatus.get("leader"); String overseerLeader = (String) overSeerStatus.get("leader");
for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) { for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
JettySolrRunner jetty = cluster.getJettySolrRunner(i);
if (jetty.getNodeName().equals(overseerLeader)) { if (jetty.getNodeName().equals(overseerLeader)) {
overseerJetty = jetty; overseerJetty = jetty;
break; break;
@ -93,7 +84,11 @@ public class MoveReplicaTest extends SolrCloudTestCase {
@After @After
public void afterTest() throws Exception { public void afterTest() throws Exception {
cluster.shutdown(); try {
shutdownCluster();
} finally {
super.tearDown();
}
} }
@Test @Test
@ -147,7 +142,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
success = true; success = true;
break; break;
} }
assertFalse(rsp.getRequestStatus() == RequestStatusState.FAILED); assertNotSame(rsp.getRequestStatus(), RequestStatusState.FAILED);
Thread.sleep(500); Thread.sleep(500);
} }
assertTrue(success); assertTrue(success);
@ -292,7 +287,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
boolean success = true; boolean success = true;
for (int i = 0; i < 200; i++) { for (int i = 0; i < 200; i++) {
CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient); CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient);
assertTrue(rsp.getRequestStatus().toString(), rsp.getRequestStatus() != RequestStatusState.COMPLETED); assertNotSame(rsp.getRequestStatus().toString(), rsp.getRequestStatus(), RequestStatusState.COMPLETED);
if (rsp.getRequestStatus() == RequestStatusState.FAILED) { if (rsp.getRequestStatus() == RequestStatusState.FAILED) {
success = false; success = false;
break; break;
@ -306,46 +301,11 @@ public class MoveReplicaTest extends SolrCloudTestCase {
} }
private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode, String shardId) { private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode, String shardId) {
if (random().nextBoolean()) {
return new CollectionAdminRequest.MoveReplica(coll, shardId, targetNode, replica.getNodeName()); return new CollectionAdminRequest.MoveReplica(coll, shardId, targetNode, replica.getNodeName());
} else {
// for backcompat testing of SOLR-11068
// todo remove in solr 8.0
return new BackCompatMoveReplicaRequest(coll, shardId, targetNode, replica.getNodeName());
}
} }
private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode) { private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode) {
if (random().nextBoolean()) {
return new CollectionAdminRequest.MoveReplica(coll, replica.getName(), targetNode); return new CollectionAdminRequest.MoveReplica(coll, replica.getName(), targetNode);
} else {
// for backcompat testing of SOLR-11068
// todo remove in solr 8.0
return new BackCompatMoveReplicaRequest(coll, replica.getName(), targetNode);
}
}
/**
* Added for backcompat testing
* todo remove in solr 8.0
*/
static class BackCompatMoveReplicaRequest extends CollectionAdminRequest.MoveReplica {
public BackCompatMoveReplicaRequest(String collection, String replica, String targetNode) {
super(collection, replica, targetNode);
}
public BackCompatMoveReplicaRequest(String collection, String shard, String sourceNode, String targetNode) {
super(collection, shard, sourceNode, targetNode);
}
@Override
public SolrParams getParams() {
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
if (randomlyMoveReplica) {
params.set(CollectionParams.FROM_NODE, sourceNode);
}
return params;
}
} }
private Replica getRandomReplica(String coll, CloudSolrClient cloudClient) { private Replica getRandomReplica(String coll, CloudSolrClient cloudClient) {
@ -369,9 +329,8 @@ public class MoveReplicaTest extends SolrCloudTestCase {
return status.getCoreStatus().size(); return status.getCoreStatus().size();
} else { } else {
int size = 0; int size = 0;
Iterator<Map.Entry<String, NamedList<Object>>> it = status.getCoreStatus().iterator(); for (Map.Entry<String, NamedList<Object>> stringNamedListEntry : status.getCoreStatus()) {
while (it.hasNext()) { String coll = (String) stringNamedListEntry.getValue().findRecursive("cloud", "collection");
String coll = (String)it.next().getValue().findRecursive("cloud", "collection");
if (collectionName.equals(coll)) { if (collectionName.equals(coll)) {
size++; size++;
} }

View File

@ -17,14 +17,12 @@
package org.apache.solr.cloud.hdfs; package org.apache.solr.cloud.hdfs;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.MoveReplicaHDFSTest;
import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Replica;
@ -34,8 +32,7 @@ import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ThreadLeakFilters(defaultFilters = true, filters = { @ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class, // hdfs currently leaks thread(s) BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
MoveReplicaHDFSTest.ForkJoinThreadsFilter.class
}) })
public class HDFSCollectionsAPITest extends SolrCloudTestCase { public class HDFSCollectionsAPITest extends SolrCloudTestCase {