SOLR-7311: Add some infrastructure and tests to make sure Solr works well in the face of Name Node high availability and failover.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1675883 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2015-04-24 15:08:11 +00:00
parent 1a778cd068
commit ed9e1fc1b5
12 changed files with 201 additions and 55 deletions

View File

@ -26,26 +26,34 @@ import org.apache.solr.common.SolrException.ErrorCode;
public class HdfsUtil {
// Allows tests to easily add additional conf
public static Configuration TEST_CONF = null;
private static final String[] HADOOP_CONF_FILES = {"core-site.xml",
"hdfs-site.xml", "mapred-site.xml", "yarn-site.xml", "hadoop-site.xml"};
public static void addHdfsResources(Configuration conf, String confDir) {
if (confDir != null && confDir.length() > 0) {
File confDirFile = new File(confDir);
if (!confDirFile.exists()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Resource directory does not exist: " + confDirFile.getAbsolutePath());
}
if (!confDirFile.isDirectory()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Specified resource directory is not a directory" + confDirFile.getAbsolutePath());
}
if (!confDirFile.canRead()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Resource directory must be readable by the Solr process: " + confDirFile.getAbsolutePath());
}
for (String file : HADOOP_CONF_FILES) {
if (new File(confDirFile, file).exists()) {
conf.addResource(new Path(confDir, file));
if (confDir != null && confDir.length() > 0) {
File confDirFile = new File(confDir);
if (!confDirFile.exists()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Resource directory does not exist: " + confDirFile.getAbsolutePath());
}
if (!confDirFile.isDirectory()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Specified resource directory is not a directory" + confDirFile.getAbsolutePath());
}
if (!confDirFile.canRead()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Resource directory must be readable by the Solr process: " + confDirFile.getAbsolutePath());
}
for (String file : HADOOP_CONF_FILES) {
if (new File(confDirFile, file).exists()) {
conf.addResource(new Path(confDir, file));
}
}
}
}
if (TEST_CONF != null) {
conf.addResource(TEST_CONF);
}
}
}

View File

@ -41,17 +41,13 @@ public class HdfsCollectionsAPIDistributedZkTest extends CollectionsAPIDistribut
@BeforeClass
public static void setupClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
System.setProperty("solr.hdfs.home", dfsCluster.getURI().toString() + "/solr");
System.setProperty("solr.hdfs.blockcache.enabled", "false");
}
@AfterClass
public static void teardownClass() throws Exception {
assertEquals(0, HdfsUpdateLog.INIT_FAILED_LOGS_COUNT.get());
HdfsTestUtil.teardownClass(dfsCluster);
System.clearProperty("solr.hdfs.home");
System.clearProperty("solr.hdfs.blockcache.enabled");
dfsCluster = null;
}

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud.hdfs;
import java.io.IOException;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.cloud.BasicDistributedZkTest;
import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
@Slow
@ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
})
public class HdfsNNFailoverTest extends BasicDistributedZkTest {
private static final String COLLECTION = "collection";
private static MiniDFSCluster dfsCluster;
@BeforeClass
public static void setupClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath(), false, true);
}
@AfterClass
public static void teardownClass() throws Exception {
HdfsTestUtil.teardownClass(dfsCluster);
dfsCluster = null;
}
@Override
protected String getDataDir(String dataDir) throws IOException {
return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
}
public HdfsNNFailoverTest() {
super();
sliceCount = 1;
fixShardCount(TEST_NIGHTLY ? 7 : random().nextInt(2) + 1);
}
protected String getSolrXml() {
return "solr-no-core.xml";
}
@Test
public void test() throws Exception {
createCollection(COLLECTION, 1, 1, 1);
waitForRecoveriesToFinish(COLLECTION, false);
// TODO: SOLR-7360 Enable HDFS NameNode failover testing.
// dfsCluster.transitionToStandby(0);
// dfsCluster.transitionToActive(1);
}
}

View File

@ -14,10 +14,13 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.util.HdfsUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -41,6 +44,10 @@ import org.slf4j.LoggerFactory;
public class HdfsTestUtil {
private static Logger log = LoggerFactory.getLogger(HdfsTestUtil.class);
private static final String LOGICAL_HOSTNAME = "ha-nn-uri-%d";
private static final boolean HA_TESTING_ENABLED = false; // SOLR-XXX
private static Locale savedLocale;
private static Map<MiniDFSCluster,Timer> timers = new ConcurrentHashMap<>();
@ -50,17 +57,23 @@ public class HdfsTestUtil {
private static FileSystem badTlogOutStreamFs;
public static MiniDFSCluster setupClass(String dir) throws Exception {
return setupClass(dir, true);
return setupClass(dir, true, true);
}
public static MiniDFSCluster setupClass(String dir, boolean safeModeTesting) throws Exception {
public static MiniDFSCluster setupClass(String dir, boolean haTesting) throws Exception {
return setupClass(dir, haTesting, true);
}
public static MiniDFSCluster setupClass(String dir, boolean safeModeTesting, boolean haTesting) throws Exception {
LuceneTestCase.assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs",
Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false")));
Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false")));
savedLocale = Locale.getDefault();
// TODO: we HACK around HADOOP-9643
Locale.setDefault(Locale.ENGLISH);
if (!HA_TESTING_ENABLED) haTesting = false;
int dataNodes = 2;
Configuration conf = new Configuration();
@ -78,17 +91,35 @@ public class HdfsTestUtil {
System.setProperty("solr.hdfs.blockcache.global", Boolean.toString(LuceneTestCase.random().nextBoolean()));
final MiniDFSCluster dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
final MiniDFSCluster dfsCluster;
if (!haTesting) {
dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
System.setProperty("solr.hdfs.home", getDataDir(dfsCluster, "solr_hdfs_home"));
} else {
dfsCluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(dataNodes)
.build();
Configuration haConfig = getClientConfiguration(dfsCluster);
HdfsUtil.TEST_CONF = haConfig;
System.setProperty("solr.hdfs.home", getDataDir(dfsCluster, "solr_hdfs_home"));
}
dfsCluster.waitActive();
System.setProperty("solr.hdfs.home", getDataDir(dfsCluster, "solr_hdfs_home"));
if (haTesting) dfsCluster.transitionToActive(0);
int rndMode = LuceneTestCase.random().nextInt(10);
if (safeModeTesting && rndMode > 4) {
int rndMode = LuceneTestCase.random().nextInt(3);
if (safeModeTesting && rndMode == 1) {
NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
int rnd = LuceneTestCase.random().nextInt(10000);
Timer timer = new Timer();
timers.put(dfsCluster, timer);
timer.schedule(new TimerTask() {
@Override
@ -96,9 +127,27 @@ public class HdfsTestUtil {
NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
}
}, rnd);
} else if (haTesting && rndMode == 2) {
int rnd = LuceneTestCase.random().nextInt(30000);
Timer timer = new Timer();
timers.put(dfsCluster, timer);
} else {
timer.schedule(new TimerTask() {
@Override
public void run() {
// TODO: randomly transition to standby
// try {
// dfsCluster.transitionToStandby(0);
// dfsCluster.transitionToActive(1);
// } catch (IOException e) {
// throw new RuntimeException();
// }
}
}, rnd);
} else {
// TODO: we could do much better at testing this
// force a lease recovery by creating a tlog file and not closing it
URI uri = dfsCluster.getURI();
@ -112,6 +161,16 @@ public class HdfsTestUtil {
return dfsCluster;
}
public static Configuration getClientConfiguration(MiniDFSCluster dfsCluster) {
if (dfsCluster.getNameNodeInfos().length > 1) {
Configuration conf = new Configuration();
HATestUtil.setFailoverConfigurations(dfsCluster, conf);
return conf;
} else {
return new Configuration();
}
}
public static void teardownClass(MiniDFSCluster dfsCluster) throws Exception {
@ -156,12 +215,21 @@ public class HdfsTestUtil {
if (dataDir == null) {
return null;
}
URI uri = dfsCluster.getURI();
String dir = uri.toString()
+ "/"
String dir = "/"
+ new File(dataDir).toString().replaceAll(":", "_")
.replaceAll("/", "_").replaceAll(" ", "_");
return dir;
return getURI(dfsCluster) + dir;
}
public static String getURI(MiniDFSCluster dfsCluster) {
if (dfsCluster.getNameNodeInfos().length > 1) {
String logicalName = String.format(Locale.ENGLISH, LOGICAL_HOSTNAME, dfsCluster.getInstanceId()); // NOTE: hdfs uses default locale
return "hdfs://" + logicalName;
} else {
URI uri = dfsCluster.getURI(0);
return uri.toString() ;
}
}
}

View File

@ -45,7 +45,7 @@ public class HdfsThreadLeakTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath(), false);
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath(), false, false);
}
@AfterClass
@ -66,8 +66,8 @@ public class HdfsThreadLeakTest extends SolrTestCaseJ4 {
@Test
public void testBasic() throws IOException {
URI uri = dfsCluster.getURI();
Path path = new Path(uri.toString());
String uri = HdfsTestUtil.getURI(dfsCluster);
Path path = new Path(uri);
Configuration conf = new Configuration();
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
FileSystem fs = FileSystem.get(path.toUri(), conf);

View File

@ -64,13 +64,11 @@ public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest {
public static void setupClass() throws Exception {
schemaString = "schema15.xml"; // we need a string id
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
System.setProperty(SOLR_HDFS_HOME, dfsCluster.getURI().toString() + "/solr");
}
@AfterClass
public static void teardownClass() throws Exception {
HdfsTestUtil.teardownClass(dfsCluster);
System.clearProperty(SOLR_HDFS_HOME);
dfsCluster = null;
}

View File

@ -65,13 +65,11 @@ public class StressHdfsTest extends BasicDistributedZkTest {
@BeforeClass
public static void setupClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
System.setProperty("solr.hdfs.home", dfsCluster.getURI().toString() + "/solr");
}
@AfterClass
public static void teardownClass() throws Exception {
HdfsTestUtil.teardownClass(dfsCluster);
System.clearProperty("solr.hdfs.home");
dfsCluster = null;
}
@ -217,9 +215,9 @@ public class StressHdfsTest extends BasicDistributedZkTest {
// check that all dirs are gone
for (String dataDir : dataDirs) {
Configuration conf = new Configuration();
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
FileSystem fs = FileSystem.get(new URI(dataDir), conf);
FileSystem fs = FileSystem.get(new URI(HdfsTestUtil.getURI(dfsCluster)), conf);
assertFalse(
"Data directory exists after collection removal : " + dataDir,
fs.exists(new Path(dataDir)));

View File

@ -61,7 +61,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
// test sys prop config
System.setProperty("solr.hdfs.home", dfsCluster.getURI().toString() + "/solr1");
System.setProperty("solr.hdfs.home", HdfsTestUtil.getURI(dfsCluster) + "/solr1");
hdfsFactory.init(new NamedList<>());
String dataHome = hdfsFactory.getDataHome(new MockCoreDescriptor());
@ -72,7 +72,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
// test init args config
NamedList<Object> nl = new NamedList<>();
nl.add("solr.hdfs.home", dfsCluster.getURI().toString() + "/solr2");
nl.add("solr.hdfs.home", HdfsTestUtil.getURI(dfsCluster) + "/solr2");
hdfsFactory.init(nl);
dataHome = hdfsFactory.getDataHome(new MockCoreDescriptor());
@ -80,7 +80,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
// test sys prop and init args config - init args wins
System.setProperty("solr.hdfs.home", dfsCluster.getURI().toString() + "/solr1");
System.setProperty("solr.hdfs.home", HdfsTestUtil.getURI(dfsCluster) + "/solr1");
hdfsFactory.init(nl);
dataHome = hdfsFactory.getDataHome(new MockCoreDescriptor());
@ -95,7 +95,7 @@ public class HdfsDirectoryFactoryTest extends SolrTestCaseJ4 {
System.setProperty(HdfsDirectoryFactory.CONFIG_DIRECTORY, confDir.toString());
Directory dir = hdfsFactory.create(dfsCluster.getURI().toString() + "/solr", NoLockFactory.INSTANCE, DirContext.DEFAULT);
Directory dir = hdfsFactory.create(HdfsTestUtil.getURI(dfsCluster) + "/solr", NoLockFactory.INSTANCE, DirContext.DEFAULT);
try {
assertEquals(confDir.toString(), hdfsFactory.getConfDir());
} finally {

View File

@ -78,12 +78,11 @@ public class TestRecoveryHdfs extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
System.setProperty("solr.hdfs.home", dfsCluster.getURI().toString() + "/solr");
hdfsUri = dfsCluster.getFileSystem().getUri().toString();
hdfsUri = HdfsTestUtil.getURI(dfsCluster);
try {
URI uri = new URI(hdfsUri);
Configuration conf = new Configuration();
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
fs = FileSystem.get(uri, conf);
} catch (IOException | URISyntaxException e) {

View File

@ -72,10 +72,10 @@ public class HdfsDirectoryTest extends SolrTestCaseJ4 {
public void setUp() throws Exception {
super.setUp();
Configuration conf = new Configuration();
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
conf.set("dfs.permissions.enabled", "false");
directory = new HdfsDirectory(new Path(dfsCluster.getURI().toString() + createTempDir().toFile().getAbsolutePath() + "/hdfs"), NoLockFactory.INSTANCE, conf);
directory = new HdfsDirectory(new Path(HdfsTestUtil.getURI(dfsCluster) + createTempDir().toFile().getAbsolutePath() + "/hdfs"), NoLockFactory.INSTANCE, conf);
random = random();
}

View File

@ -65,9 +65,9 @@ public class HdfsLockFactoryTest extends SolrTestCaseJ4 {
@Test
public void testBasic() throws IOException {
URI uri = dfsCluster.getURI();
Path lockPath = new Path(uri.toString(), "/basedir/lock");
Configuration conf = new Configuration();
String uri = HdfsTestUtil.getURI(dfsCluster);
Path lockPath = new Path(uri, "/basedir/lock");
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
HdfsDirectory dir = new HdfsDirectory(lockPath, conf);
Lock lock = dir.makeLock("testlock");
boolean success = lock.obtain();

View File

@ -51,11 +51,11 @@ public class TestHdfsUpdateLog extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
hdfsUri = dfsCluster.getFileSystem().getUri().toString();
hdfsUri = HdfsTestUtil.getURI(dfsCluster);
try {
URI uri = new URI(hdfsUri);
Configuration conf = new Configuration();
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
fs = FileSystem.get(uri, conf);
} catch (IOException e) {