Merge r1455389 through r1457712 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1457716 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2013-03-18 11:45:07 +00:00
commit a5a66330a8
97 changed files with 1972 additions and 523 deletions

View File

@ -0,0 +1,40 @@
#!/bin/bash
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
# script to find hanging test from Jenkins build output
# usage: ./findHangingTest.sh <url of Jenkins build console>
#
`curl -k -o jenkins.out "$1"`
expecting=Running
cat jenkins.out | while read line; do
if [[ "$line" =~ "Running org.apache.hadoop" ]]; then
if [[ "$expecting" =~ "Running" ]]; then
expecting=Tests
else
echo "Hanging test: $prevLine"
fi
fi
if [[ "$line" =~ "Tests run" ]]; then
expecting=Running
fi
if [[ "$line" =~ "Forking command line" ]]; then
a=$line
else
prevLine=$line
fi
done

View File

@ -436,8 +436,8 @@ checkJavadocWarnings () {
echo ""
echo "There appear to be $javadocWarnings javadoc warnings generated by the patched build."
#There are 6 warnings that are caused by things that are caused by using sun internal APIs.
OK_JAVADOC_WARNINGS=6;
#There are 11 warnings that are caused by things that are caused by using sun internal APIs.
OK_JAVADOC_WARNINGS=11;
### if current warnings greater than OK_JAVADOC_WARNINGS
if [[ $javadocWarnings -ne $OK_JAVADOC_WARNINGS ]] ; then
JIRA_COMMENT="$JIRA_COMMENT

View File

@ -115,6 +115,14 @@
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
</exclusion>
<exclusion>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</exclusion>
<exclusion>
<groupId>commons-el</groupId>
<artifactId>commons-el</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -131,6 +139,34 @@
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
</exclusion>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty-util</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet.jsp</groupId>
<artifactId>jsp-api</artifactId>
</exclusion>
<exclusion>
<groupId>tomcat</groupId>
<artifactId>jasper-runtime</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -171,6 +207,10 @@
<groupId>jline</groupId>
<artifactId>jline</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -207,6 +247,18 @@
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -247,6 +299,14 @@
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-servlet</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -271,8 +331,25 @@
<groupId>com.google.inject.extensions</groupId>
<artifactId>guice-servlet</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>

View File

@ -38,4 +38,24 @@
</dependency>
</dependencies>
<profiles>
<profile>
<id>os.linux</id>
<activation>
<os>
<family>!Mac</family>
</os>
</activation>
<dependencies>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.6</version>
<scope>system</scope>
<systemPath>${java.home}/../lib/tools.jar</systemPath>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@ -340,6 +340,19 @@ Trunk (Unreleased)
HADOOP-9264. Port change to use Java untar API on Windows from
branch-1-win to trunk. (Chris Nauroth via suresh)
HADOOP-9393. TestRPC fails with JDK7. (Andrew Wang via atm)
HADOOP-9394. Port findHangingTest.sh from HBase to Hadoop. (Andrew Wang
via atm)
HADOOP-9099. NetUtils.normalizeHostName fails on domains where
UnknownHost resolves to an IP address. (Ivan Mitic via suresh)
HADOOP-9397. Incremental dist tar build fails (Chris Nauroth via jlowe)
HADOOP-9405. TestGridmixSummary#testExecutionSummarizer is broken. (Andrew
Wang via atm)
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
@ -468,6 +481,11 @@ Trunk (Unreleased)
HADOOP-9364. PathData#expandAsGlob does not return correct results for
absolute paths on Windows. (Ivan Mitic via suresh)
HADOOP-8973. DiskChecker cannot reliably detect an inaccessible disk on
Windows with NTFS ACLs. (Chris Nauroth via suresh)
HADOOP-9388. TestFsShellCopy fails on Windows. (Ivan Mitic via suresh)
Release 2.0.5-beta - UNRELEASED
INCOMPATIBLE CHANGES
@ -493,6 +511,9 @@ Release 2.0.5-beta - UNRELEASED
HADOOP-9343. Allow additional exceptions through the RPC layer. (sseth)
HADOOP-9318. When exiting on a signal, print the signal name first. (Colin
Patrick McCabe via atm)
OPTIMIZATIONS
BUG FIXES
@ -545,6 +566,11 @@ Release 2.0.5-beta - UNRELEASED
HADOOP-9379. capture the ulimit info after printing the log to the
console. (Arpit Gupta via suresh)
HADOOP-9399. protoc maven plugin doesn't work on mvn 3.0.2 (todd)
HADOOP-9407. commons-daemon 1.0.3 dependency has bad group id causing
build issues. (Sangjin Lee via suresh)
Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES
@ -557,6 +583,14 @@ Release 2.0.4-alpha - UNRELEASED
BUG FIXES
HADOOP-9406. hadoop-client leaks dependency on JDK tools jar. (tucu)
HADOOP-9301. hadoop client servlet/jsp/jetty/tomcat JARs creating
conflicts in Oozie & HttpFS. (tucu)
HADOOP-9408. misleading description for net.topology.table.file.name
property in core-default.xml. (rajeshbabu via suresh)
Release 2.0.3-alpha - 2013-02-06
INCOMPATIBLE CHANGES
@ -2798,6 +2832,9 @@ Release 0.23.0 - 2011-11-01
HADOOP-7797. Fix top-level pom.xml to refer to correct staging maven
repository. (omalley via acmurthy)
HADOOP-7101. UserGroupInformation.getCurrentUser() fails when called from
non-Hadoop JAAS context. (todd)
Release 0.22.1 - Unreleased
INCOMPATIBLE CHANGES
@ -3255,9 +3292,6 @@ Release 0.22.0 - 2011-11-29
HADOOP-7093. Servlets should default to text/plain (todd)
HADOOP-7101. UserGroupInformation.getCurrentUser() fails when called from
non-Hadoop JAAS context. (todd)
HADOOP-7089. Fix link resolution logic in hadoop-config.sh. (eli)
HADOOP-7046. Fix Findbugs warning in Configuration. (Po Cheung via shv)

View File

@ -223,6 +223,10 @@
<groupId>jline</groupId>
<artifactId>jline</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<!-- otherwise seems to drag in junit 3.8.1 via jline -->
<groupId>junit</groupId>

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.fs.shell;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
@ -224,7 +223,13 @@ abstract class CommandWithDestination extends FsCommand {
*/
protected void copyFileToTarget(PathData src, PathData target) throws IOException {
src.fs.setVerifyChecksum(verifyChecksum);
copyStreamToTarget(src.fs.open(src.path), target);
InputStream in = null;
try {
in = src.fs.open(src.path);
copyStreamToTarget(in, target);
} finally {
IOUtils.closeStream(in);
}
}
/**

View File

@ -0,0 +1,93 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import sun.misc.Signal;
import sun.misc.SignalHandler;
import org.apache.commons.logging.Log;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* This class logs a message whenever we're about to exit on a UNIX signal.
* This is helpful for determining the root cause of a process' exit.
* For example, if the process exited because the system administrator
* ran a standard "kill," you would see 'EXITING ON SIGNAL SIGTERM' in the log.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public enum SignalLogger {
INSTANCE;
private boolean registered = false;
/**
* Our signal handler.
*/
private static class Handler implements SignalHandler {
final private org.apache.commons.logging.Log LOG;
final private SignalHandler prevHandler;
Handler(String name, Log LOG) {
this.LOG = LOG;
prevHandler = Signal.handle(new Signal(name), this);
}
/**
* Handle an incoming signal.
*
* @param signal The incoming signal
*/
@Override
public void handle(Signal signal) {
LOG.error("RECEIVED SIGNAL " + signal.getNumber() +
": SIG" + signal.getName());
prevHandler.handle(signal);
}
}
/**
* Register some signal handlers.
*
* @param LOG The log4j logfile to use in the signal handlers.
*/
public void register(final Log LOG) {
if (registered) {
throw new IllegalStateException("Can't re-install the signal handlers.");
}
registered = true;
StringBuilder bld = new StringBuilder();
bld.append("registered UNIX signal handlers for [");
final String SIGNALS[] = { "TERM", "HUP", "INT" };
String separator = "";
for (String signalName : SIGNALS) {
try {
new Handler(signalName, LOG);
bld.append(separator);
bld.append(signalName);
separator = ", ";
} catch (Exception e) {
LOG.debug(e);
}
}
bld.append("]");
LOG.info(bld.toString());
}
}

View File

@ -35,6 +35,7 @@ import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.SystemUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Path;
@ -613,6 +614,13 @@ public class StringUtils {
)
);
if (SystemUtils.IS_OS_UNIX) {
try {
SignalLogger.INSTANCE.register(LOG);
} catch (Throwable t) {
LOG.warn("failed to register any UNIX signal loggers: ", t);
}
}
ShutdownHookManager.get().addShutdownHook(
new Runnable() {
@Override

View File

@ -699,7 +699,7 @@
<name>net.topology.table.file.name</name>
<value></value>
<description> The file name for a topology file, which is used when the
net.topology.script.file.name property is set to
net.topology.node.switch.mapping.impl property is set to
org.apache.hadoop.net.TableMapping. The file format is a two column text
file, with columns separated by whitespace. The first column is a DNS or
IP address and the second column specifies the rack where the address maps.

View File

@ -834,23 +834,27 @@ public class TestRPC {
TestProtocol.class, new TestImpl(), ADDRESS, 0, 5, true, conf, null
);
server.start();
InetSocketAddress addr = NetUtils.getConnectAddress(server);
final TestProtocol proxy = (TestProtocol) RPC.getProxy(
TestProtocol.class, TestProtocol.versionID, addr, conf);
// Connect to the server
proxy.ping();
// Interrupt self, try another call
Thread.currentThread().interrupt();
try {
InetSocketAddress addr = NetUtils.getConnectAddress(server);
final TestProtocol proxy = (TestProtocol) RPC.getProxy(
TestProtocol.class, TestProtocol.versionID, addr, conf);
// Connect to the server
proxy.ping();
fail("Interruption did not cause IPC to fail");
} catch (IOException ioe) {
if (!ioe.toString().contains("InterruptedException")) {
throw ioe;
// Interrupt self, try another call
Thread.currentThread().interrupt();
try {
proxy.ping();
fail("Interruption did not cause IPC to fail");
} catch (IOException ioe) {
if (!ioe.toString().contains("InterruptedException")) {
throw ioe;
}
// clear interrupt status for future tests
Thread.interrupted();
}
// clear interrupt status for future tests
Thread.interrupted();
} finally {
server.stop();
}
}
@ -862,59 +866,62 @@ public class TestRPC {
);
server.start();
int numConcurrentRPC = 200;
InetSocketAddress addr = NetUtils.getConnectAddress(server);
final CyclicBarrier barrier = new CyclicBarrier(numConcurrentRPC);
final CountDownLatch latch = new CountDownLatch(numConcurrentRPC);
final AtomicBoolean leaderRunning = new AtomicBoolean(true);
final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
Thread leaderThread = null;
for (int i = 0; i < numConcurrentRPC; i++) {
final int num = i;
final TestProtocol proxy = (TestProtocol) RPC.getProxy(
TestProtocol.class, TestProtocol.versionID, addr, conf);
Thread rpcThread = new Thread(new Runnable() {
@Override
public void run() {
try {
barrier.await();
while (num == 0 || leaderRunning.get()) {
try {
int numConcurrentRPC = 200;
InetSocketAddress addr = NetUtils.getConnectAddress(server);
final CyclicBarrier barrier = new CyclicBarrier(numConcurrentRPC);
final CountDownLatch latch = new CountDownLatch(numConcurrentRPC);
final AtomicBoolean leaderRunning = new AtomicBoolean(true);
final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
Thread leaderThread = null;
for (int i = 0; i < numConcurrentRPC; i++) {
final int num = i;
final TestProtocol proxy = (TestProtocol) RPC.getProxy(
TestProtocol.class, TestProtocol.versionID, addr, conf);
Thread rpcThread = new Thread(new Runnable() {
@Override
public void run() {
try {
barrier.await();
while (num == 0 || leaderRunning.get()) {
proxy.slowPing(false);
}
proxy.slowPing(false);
} catch (Exception e) {
if (num == 0) {
leaderRunning.set(false);
} else {
error.set(e);
}
LOG.error(e);
} finally {
latch.countDown();
}
proxy.slowPing(false);
} catch (Exception e) {
if (num == 0) {
leaderRunning.set(false);
} else {
error.set(e);
}
LOG.error(e);
} finally {
latch.countDown();
}
});
rpcThread.start();
if (leaderThread == null) {
leaderThread = rpcThread;
}
});
rpcThread.start();
if (leaderThread == null) {
leaderThread = rpcThread;
}
// let threads get past the barrier
Thread.sleep(1000);
// stop a single thread
while (leaderRunning.get()) {
leaderThread.interrupt();
}
latch.await();
// should not cause any other thread to get an error
assertTrue("rpc got exception " + error.get(), error.get() == null);
} finally {
server.stop();
}
// let threads get past the barrier
Thread.sleep(1000);
// stop a single thread
while (leaderRunning.get()) {
leaderThread.interrupt();
}
latch.await();
// should not cause any other thread to get an error
assertTrue("rpc got exception " + error.get(), error.get() == null);
}
public static void main(String[] args) throws Exception {

View File

@ -605,7 +605,7 @@ public class TestNetUtils {
@Test
public void testNormalizeHostName() {
List<String> hosts = Arrays.asList(new String[] {"127.0.0.1",
"localhost", "3w.org", "UnknownHost"});
"localhost", "3w.org", "UnknownHost123"});
List<String> normalizedHosts = NetUtils.normalizeHostNames(hosts);
// when ipaddress is normalized, same address is expected in return
assertEquals(normalizedHosts.get(0), hosts.get(0));

View File

@ -162,6 +162,9 @@ public abstract class GenericTestUtils {
private final CountDownLatch waitLatch = new CountDownLatch(1);
private final CountDownLatch resultLatch = new CountDownLatch(1);
private final AtomicInteger fireCounter = new AtomicInteger(0);
private final AtomicInteger resultCounter = new AtomicInteger(0);
// Result fields set after proceed() is called.
private volatile Throwable thrown;
private volatile Object returnValue;
@ -188,6 +191,7 @@ public abstract class GenericTestUtils {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
LOG.info("DelayAnswer firing fireLatch");
fireCounter.getAndIncrement();
fireLatch.countDown();
try {
LOG.info("DelayAnswer waiting on waitLatch");
@ -208,6 +212,7 @@ public abstract class GenericTestUtils {
thrown = t;
throw t;
} finally {
resultCounter.incrementAndGet();
resultLatch.countDown();
}
}
@ -235,6 +240,14 @@ public abstract class GenericTestUtils {
public Object getReturnValue() {
return returnValue;
}
public int getFireCount() {
return fireCounter.get();
}
public int getResultCount() {
return resultCounter.get();
}
}
/**

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import org.apache.commons.lang.SystemUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Test;
public class TestSignalLogger {
public static final Log LOG = LogFactory.getLog(TestSignalLogger.class);
@Test(timeout=60000)
public void testInstall() throws Exception {
Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
SignalLogger.INSTANCE.register(LOG);
try {
SignalLogger.INSTANCE.register(LOG);
Assert.fail("expected IllegalStateException from double registration");
} catch (IllegalStateException e) {
// fall through
}
}
}

View File

@ -152,7 +152,7 @@
}
run tar cf hadoop-${project.version}.tar hadoop-${project.version}
run gzip hadoop-${project.version}.tar
run gzip -f hadoop-${project.version}.tar
echo
echo "Hadoop dist tar available at: ${project.build.directory}/hadoop-${project.version}.tar.gz"
echo

View File

@ -232,7 +232,7 @@ public class Server {
* path.
*/
private String checkAbsolutePath(String value, String name) {
if (!value.startsWith("/")) {
if (!new File(value).isAbsolute()) {
throw new IllegalArgumentException(
MessageFormat.format("[{0}] must be an absolute path [{1}]", name, value));
}

View File

@ -364,7 +364,7 @@ public abstract class BaseTestHttpFSWith extends HFSTestCase {
}
}
private void testSetPermission() throws Exception {
protected void testSetPermission() throws Exception {
FileSystem fs = FileSystem.get(getProxiedFSConf());
Path path = new Path(getProxiedFSTestDir(), "foodir");
fs.mkdirs(path);

View File

@ -20,8 +20,13 @@ package org.apache.hadoop.fs.http.client;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.test.TestDirHelper;
import org.junit.Assert;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@ -65,21 +70,31 @@ public class TestHttpFSFileSystemLocalFileSystem extends BaseTestHttpFSWith {
}
protected Path addPrefix(Path path) {
URI uri = path.toUri();
try {
if (uri.getAuthority() != null) {
uri = new URI(uri.getScheme(),
uri.getAuthority(), PATH_PREFIX + uri.getPath());
}
else {
if (uri.getPath().startsWith("/")) {
uri = new URI(PATH_PREFIX + uri.getPath());
}
}
} catch (URISyntaxException ex) {
throw new RuntimeException("It should not happen: " + ex.toString(), ex);
}
return new Path(uri);
return Path.mergePaths(new Path(PATH_PREFIX), path);
}
@Override
protected void testSetPermission() throws Exception {
if (Path.WINDOWS) {
FileSystem fs = FileSystem.get(getProxiedFSConf());
Path path = new Path(getProxiedFSTestDir(), "foodir");
fs.mkdirs(path);
fs = getHttpFSFileSystem();
FsPermission permission1 = new FsPermission(FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE);
fs.setPermission(path, permission1);
fs.close();
fs = FileSystem.get(getProxiedFSConf());
FileStatus status1 = fs.getFileStatus(path);
fs.close();
FsPermission permission2 = status1.getPermission();
Assert.assertEquals(permission2, permission1);
// sticky bit not supported on Windows with local file system, so the
// subclass skips that part of the test
} else {
super.testSetPermission();
}
}
}

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.test.HTestCase;
import org.apache.hadoop.test.TestDir;
import org.apache.hadoop.test.TestDirHelper;
import org.apache.hadoop.test.TestException;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.junit.Test;
@ -50,21 +51,24 @@ public class TestServer extends HTestCase {
@Test
@TestDir
public void constructorsGetters() throws Exception {
Server server = new Server("server", "/a", "/b", "/c", "/d", new Configuration(false));
assertEquals(server.getHomeDir(), "/a");
assertEquals(server.getConfigDir(), "/b");
assertEquals(server.getLogDir(), "/c");
assertEquals(server.getTempDir(), "/d");
Server server = new Server("server", getAbsolutePath("/a"),
getAbsolutePath("/b"), getAbsolutePath("/c"), getAbsolutePath("/d"),
new Configuration(false));
assertEquals(server.getHomeDir(), getAbsolutePath("/a"));
assertEquals(server.getConfigDir(), getAbsolutePath("/b"));
assertEquals(server.getLogDir(), getAbsolutePath("/c"));
assertEquals(server.getTempDir(), getAbsolutePath("/d"));
assertEquals(server.getName(), "server");
assertEquals(server.getPrefix(), "server");
assertEquals(server.getPrefixedName("name"), "server.name");
assertNotNull(server.getConfig());
server = new Server("server", "/a", "/b", "/c", "/d");
assertEquals(server.getHomeDir(), "/a");
assertEquals(server.getConfigDir(), "/b");
assertEquals(server.getLogDir(), "/c");
assertEquals(server.getTempDir(), "/d");
server = new Server("server", getAbsolutePath("/a"), getAbsolutePath("/b"),
getAbsolutePath("/c"), getAbsolutePath("/d"));
assertEquals(server.getHomeDir(), getAbsolutePath("/a"));
assertEquals(server.getConfigDir(), getAbsolutePath("/b"));
assertEquals(server.getLogDir(), getAbsolutePath("/c"));
assertEquals(server.getTempDir(), getAbsolutePath("/d"));
assertEquals(server.getName(), "server");
assertEquals(server.getPrefix(), "server");
assertEquals(server.getPrefixedName("name"), "server.name");
@ -793,4 +797,14 @@ public class TestServer extends HTestCase {
server.destroy();
}
/**
* Creates an absolute path by appending the given relative path to the test
* root.
*
* @param relativePath String relative path
* @return String absolute path formed by appending relative path to test root
*/
private static String getAbsolutePath(String relativePath) {
return new File(TestDirHelper.getTestDir(), relativePath).getAbsolutePath();
}
}

View File

@ -50,7 +50,10 @@ public class TestHostnameFilter extends HTestCase {
@Override
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse)
throws IOException, ServletException {
assertTrue(HostnameFilter.get().contains("localhost"));
// Hostname was set to "localhost", but may get resolved automatically to
// "127.0.0.1" depending on OS.
assertTrue(HostnameFilter.get().contains("localhost") ||
HostnameFilter.get().contains("127.0.0.1"));
invoked.set(true);
}
};

View File

@ -63,7 +63,7 @@ public class TestDirHelper implements MethodRule {
static {
try {
TEST_DIR_ROOT = System.getProperty(TEST_DIR_PROP, new File("target").getAbsolutePath());
if (!TEST_DIR_ROOT.startsWith("/")) {
if (!new File(TEST_DIR_ROOT).isAbsolute()) {
System.err.println(MessageFormat.format("System property [{0}]=[{1}] must be set to an absolute path",
TEST_DIR_PROP, TEST_DIR_ROOT));
System.exit(-1);

View File

@ -82,7 +82,8 @@ public class TestHdfsHelper extends TestDirHelper {
private Path resetHdfsTestDir(Configuration conf) {
Path testDir = new Path("./" + TEST_DIR_ROOT, testName + "-" + counter.getAndIncrement());
Path testDir = new Path("/tmp/" + testName + "-" +
counter.getAndIncrement());
try {
// currentUser
FileSystem fs = FileSystem.get(conf);

View File

@ -309,6 +309,9 @@ Trunk (Unreleased)
HDFS-4391. TestDataTransferKeepalive fails when tests are executed in a
certain order. (Andrew Wang via atm)
HDFS-4586. TestDataDirs.testGetDataDirsFromURIs fails with all directories
in dfs.datanode.data.dir are invalid. (Ivan Mitic via atm)
BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
HDFS-4145. Merge hdfs cmd line scripts from branch-1-win. (David Lao,
@ -327,6 +330,16 @@ Trunk (Unreleased)
HDFS-4572. Fix TestJournal failures on Windows. (Arpit Agarwal via suresh)
HDFS-4287. HTTPFS tests fail on Windows. (Chris Nauroth via suresh)
HDFS-4593. TestSaveNamespace fails on Windows. (Arpit Agarwal via suresh)
HDFS-4582. TestHostsFiles fails on Windows. (Ivan Mitic via suresh)
HDFS-4603. TestMiniDFSCluster fails on Windows. (Ivan Mitic via suresh)
HDFS-4604. TestJournalNode fails on Windows. (Ivan Mitic via suresh)
Release 2.0.5-beta - UNRELEASED
INCOMPATIBLE CHANGES
@ -386,6 +399,31 @@ Release 2.0.5-beta - UNRELEASED
HDFS-4571. WebHDFS should not set the service hostname on the server side.
(tucu)
HDFS-4013. TestHftpURLTimeouts throws NPE. (Chao Shi via suresh)
HDFS-4592. Default values for access time precision are out of sync between
hdfs-default.xml and the code. (atm)
HDFS-4522. LightWeightGSet expects incrementing a volatile to be atomic.
(Colin Patrick McCabe via atm)
HDFS-4484. libwebhdfs compilation broken with gcc 4.6.2. (Colin Patrick
McCabe via atm)
HDFS-4595. When short circuit read is fails, DFSClient does not fallback
to regular reads. (suresh)
HDFS-4583. TestNodeCount fails. (Ivan Mitic via suresh)
HDFS-4591. HA clients can fail to fail over while Standby NN is performing
long checkpoint. (atm)
HDFS-3277. fail over to loading a different FSImage if the first one we
try to load is corrupt. (Colin Patrick McCabe and Andrew Wang via atm)
HDFS-4596. Shutting down namenode during checkpointing can lead to md5sum
error. (Andrew Wang via atm)
Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES
@ -2394,6 +2432,12 @@ Release 0.23.7 - UNRELEASED
HDFS-4577. Webhdfs operations should declare if authentication is required
(daryn via kihwal)
HDFS-3344. Unreliable corrupt blocks counting in TestProcessCorruptBlocks
(kihwal)
HDFS-3367. WebHDFS doesn't use the logged in user when opening
connections (daryn)
Release 0.23.6 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -48,6 +48,7 @@ add_executable(test_libwebhdfs_ops
)
target_link_libraries(test_libwebhdfs_ops
webhdfs
native_mini_dfs
)
add_executable(test_libwebhdfs_read
@ -69,4 +70,6 @@ add_executable(test_libwebhdfs_threaded
)
target_link_libraries(test_libwebhdfs_threaded
webhdfs
native_mini_dfs
pthread
)

View File

@ -257,7 +257,8 @@ int main(int argc, char **argv)
const char* path[] = {"/foo", "/foo/bar", "foobar", "//foo/bar//foobar",
"foo//bar", "foo/bar///", "/", "////"};
for (int i = 0; i < 8; i++) {
int i;
for (i = 0; i < 8; i++) {
fprintf(stderr, "hdfsSetWorkingDirectory: %s, %s\n",
((result = hdfsSetWorkingDirectory(fs, path[i])) ?
"Failed!" : "Success!"),
@ -281,8 +282,8 @@ int main(int argc, char **argv)
fprintf(stderr, "Name: %s, ", fileInfo->mName);
fprintf(stderr, "Type: %c, ", (char)(fileInfo->mKind));
fprintf(stderr, "Replication: %d, ", fileInfo->mReplication);
fprintf(stderr, "BlockSize: %lld, ", fileInfo->mBlockSize);
fprintf(stderr, "Size: %lld, ", fileInfo->mSize);
fprintf(stderr, "BlockSize: %"PRId64", ", fileInfo->mBlockSize);
fprintf(stderr, "Size: %"PRId64", ", fileInfo->mSize);
fprintf(stderr, "LastMod: %s", ctime(&fileInfo->mLastMod));
fprintf(stderr, "Owner: %s, ", fileInfo->mOwner);
fprintf(stderr, "Group: %s, ", fileInfo->mGroup);
@ -305,8 +306,8 @@ int main(int argc, char **argv)
fprintf(stderr, "Name: %s, ", fileList[i].mName);
fprintf(stderr, "Type: %c, ", (char)fileList[i].mKind);
fprintf(stderr, "Replication: %d, ", fileList[i].mReplication);
fprintf(stderr, "BlockSize: %lld, ", fileList[i].mBlockSize);
fprintf(stderr, "Size: %lld, ", fileList[i].mSize);
fprintf(stderr, "BlockSize: %"PRId64", ", fileList[i].mBlockSize);
fprintf(stderr, "Size: %"PRId64", ", fileList[i].mSize);
fprintf(stderr, "LastMod: %s", ctime(&fileList[i].mLastMod));
fprintf(stderr, "Owner: %s, ", fileList[i].mOwner);
fprintf(stderr, "Group: %s, ", fileList[i].mGroup);

View File

@ -23,6 +23,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.net.Socket;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
@ -31,6 +32,7 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@ -41,6 +43,7 @@ import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
import org.apache.hadoop.hdfs.util.DirectBufferPool;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DataChecksum;
@ -86,11 +89,21 @@ class BlockReaderLocal implements BlockReader {
}
private synchronized ClientDatanodeProtocol getDatanodeProxy(
DatanodeInfo node, Configuration conf, int socketTimeout,
boolean connectToDnViaHostname) throws IOException {
UserGroupInformation ugi, final DatanodeInfo node,
final Configuration conf, final int socketTimeout,
final boolean connectToDnViaHostname) throws IOException {
if (proxy == null) {
proxy = DFSUtil.createClientDatanodeProtocolProxy(node, conf,
socketTimeout, connectToDnViaHostname);
try {
proxy = ugi.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
@Override
public ClientDatanodeProtocol run() throws Exception {
return DFSUtil.createClientDatanodeProtocolProxy(node, conf,
socketTimeout, connectToDnViaHostname);
}
});
} catch (InterruptedException e) {
LOG.warn("encountered exception ", e);
}
}
return proxy;
}
@ -154,17 +167,18 @@ class BlockReaderLocal implements BlockReader {
/**
* The only way this object can be instantiated.
*/
static BlockReaderLocal newBlockReader(Configuration conf, String file,
ExtendedBlock blk, Token<BlockTokenIdentifier> token, DatanodeInfo node,
int socketTimeout, long startOffset, long length,
boolean connectToDnViaHostname) throws IOException {
static BlockReaderLocal newBlockReader(UserGroupInformation ugi,
Configuration conf, String file, ExtendedBlock blk,
Token<BlockTokenIdentifier> token, DatanodeInfo node, int socketTimeout,
long startOffset, long length, boolean connectToDnViaHostname)
throws IOException {
LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
.getIpcPort());
// check the cache first
BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
if (pathinfo == null) {
pathinfo = getBlockPathInfo(blk, node, conf, socketTimeout, token,
pathinfo = getBlockPathInfo(ugi, blk, node, conf, socketTimeout, token,
connectToDnViaHostname);
}
@ -241,13 +255,13 @@ class BlockReaderLocal implements BlockReader {
return ldInfo;
}
private static BlockLocalPathInfo getBlockPathInfo(ExtendedBlock blk,
DatanodeInfo node, Configuration conf, int timeout,
private static BlockLocalPathInfo getBlockPathInfo(UserGroupInformation ugi,
ExtendedBlock blk, DatanodeInfo node, Configuration conf, int timeout,
Token<BlockTokenIdentifier> token, boolean connectToDnViaHostname)
throws IOException {
throws IOException {
LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
BlockLocalPathInfo pathinfo = null;
ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(node,
ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(ugi, node,
conf, timeout, connectToDnViaHostname);
try {
// make RPC to local datanode to find local pathnames of blocks

View File

@ -415,6 +415,7 @@ public class DFSClient implements java.io.Closeable {
"null URI");
NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo =
NameNodeProxies.createProxy(conf, nameNodeUri, ClientProtocol.class);
this.dtService = proxyInfo.getDelegationTokenService();
this.namenode = proxyInfo.getProxy();
}
@ -794,12 +795,13 @@ public class DFSClient implements java.io.Closeable {
/**
* Get {@link BlockReader} for short circuited local reads.
*/
static BlockReader getLocalBlockReader(Configuration conf,
String src, ExtendedBlock blk, Token<BlockTokenIdentifier> accessToken,
DatanodeInfo chosenNode, int socketTimeout, long offsetIntoBlock,
boolean connectToDnViaHostname) throws InvalidToken, IOException {
static BlockReader getLocalBlockReader(UserGroupInformation ugi,
Configuration conf, String src, ExtendedBlock blk,
Token<BlockTokenIdentifier> accessToken, DatanodeInfo chosenNode,
int socketTimeout, long offsetIntoBlock, boolean connectToDnViaHostname)
throws InvalidToken, IOException {
try {
return BlockReaderLocal.newBlockReader(conf, src, blk, accessToken,
return BlockReaderLocal.newBlockReader(ugi, conf, src, blk, accessToken,
chosenNode, socketTimeout, offsetIntoBlock, blk.getNumBytes()
- offsetIntoBlock, connectToDnViaHostname);
} catch (RemoteException re) {
@ -1621,7 +1623,7 @@ public class DFSClient implements java.io.Closeable {
* @param socketFactory to create sockets to connect to DNs
* @param socketTimeout timeout to use when connecting and waiting for a response
* @param encryptionKey the key needed to communicate with DNs in this cluster
* @param connectToDnViaHostname {@see #connectToDnViaHostname()}
* @param connectToDnViaHostname {@link #connectToDnViaHostname()}
* @return The checksum
*/
static MD5MD5CRC32FileChecksum getFileChecksum(String src,
@ -2323,6 +2325,12 @@ public class DFSClient implements java.io.Closeable {
}
void disableShortCircuit() {
LOG.info("Short circuit is disabled");
shortCircuitLocalReads = false;
}
@VisibleForTesting
boolean getShortCircuitLocalReads() {
return shortCircuitLocalReads;
}
}

View File

@ -460,6 +460,10 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
" for " + blk);
}
return chosenNode;
} catch (AccessControlException ex) {
DFSClient.LOG.warn("Short circuit access failed " + ex);
dfsClient.disableShortCircuit();
continue;
} catch (IOException ex) {
if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
@ -806,7 +810,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
// we want to remember what we have tried
addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
} catch (AccessControlException ex) {
DFSClient.LOG.warn("Short circuit access failed ", ex);
DFSClient.LOG.warn("Short circuit access failed " + ex);
dfsClient.disableShortCircuit();
continue;
} catch (IOException e) {
@ -885,9 +889,9 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
// Can't local read a block under construction, see HDFS-2757
if (dfsClient.shouldTryShortCircuitRead(dnAddr) &&
!blockUnderConstruction()) {
return DFSClient.getLocalBlockReader(dfsClient.conf, src, block,
blockToken, chosenNode, dfsClient.hdfsTimeout, startOffset,
dfsClient.connectToDnViaHostname());
return DFSClient.getLocalBlockReader(dfsClient.ugi, dfsClient.conf,
src, block, blockToken, chosenNode, dfsClient.hdfsTimeout,
startOffset, dfsClient.connectToDnViaHostname());
}
IOException err = null;
@ -1027,8 +1031,8 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
* only report if the total number of replica is 1. We do not
* report otherwise since this maybe due to the client is a handicapped client
* (who can not read).
* @param corruptedBlockMap, map of corrupted blocks
* @param dataNodeCount, number of data nodes who contains the block replicas
* @param corruptedBlockMap map of corrupted blocks
* @param dataNodeCount number of data nodes who contains the block replicas
*/
private void reportCheckSumFailure(
Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap,

View File

@ -70,6 +70,8 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import com.google.common.annotations.VisibleForTesting;
/****************************************************************
* Implementation of the abstract FileSystem for the DFS system.
@ -567,9 +569,8 @@ public class DistributedFileSystem extends FileSystem {
return "DFS[" + dfs + "]";
}
/** @deprecated DFSClient should not be accessed directly. */
@InterfaceAudience.Private
@Deprecated
@VisibleForTesting
public DFSClient getClient() {
return dfs;
}

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.util.DiskChecker;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@ -82,7 +83,6 @@ public class JournalNode implements Tool, Configurable {
return journal;
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
@ -97,21 +97,9 @@ public class JournalNode implements Tool, Configurable {
"Journal dir '" + dir + "' should be an absolute path");
}
if (!dir.exists() && !dir.mkdirs()) {
throw new IOException("Could not create journal dir '" +
dir + "'");
} else if (!dir.isDirectory()) {
throw new IOException("Journal directory '" + dir + "' is not " +
"a directory");
}
if (!dir.canWrite()) {
throw new IOException("Unable to write to journal dir '" +
dir + "'");
}
DiskChecker.checkDir(dir);
}
@Override
public Configuration getConf() {
return conf;

View File

@ -78,6 +78,7 @@ public class DelegationTokenSecretManager
@Override //SecretManager
public void checkAvailableForRead() throws StandbyException {
namesystem.checkOperation(OperationCategory.READ);
namesystem.readLock();
try {
namesystem.checkOperation(OperationCategory.READ);

View File

@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.Namesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
@ -874,9 +875,10 @@ public class BlockManager {
*/
public BlocksWithLocations getBlocks(DatanodeID datanode, long size
) throws IOException {
namesystem.checkOperation(OperationCategory.READ);
namesystem.readLock();
try {
namesystem.checkSuperuserPrivilege();
namesystem.checkOperation(OperationCategory.READ);
return getBlocksWithLocations(datanode, size);
} finally {
namesystem.readUnlock();

View File

@ -1617,6 +1617,21 @@ public class DataNode extends Configured
}
}
// Small wrapper around the DiskChecker class that provides means to mock
// DiskChecker static methods and unittest DataNode#getDataDirsFromURIs.
static class DataNodeDiskChecker {
private FsPermission expectedPermission;
public DataNodeDiskChecker(FsPermission expectedPermission) {
this.expectedPermission = expectedPermission;
}
public void checkDir(LocalFileSystem localFS, Path path)
throws DiskErrorException, IOException {
DiskChecker.checkDir(localFS, path, expectedPermission);
}
}
/**
* Make an instance of DataNode after ensuring that at least one of the
* given data directories (and their parent directories, if necessary)
@ -1635,7 +1650,10 @@ public class DataNode extends Configured
FsPermission permission = new FsPermission(
conf.get(DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
ArrayList<File> dirs = getDataDirsFromURIs(dataDirs, localFS, permission);
DataNodeDiskChecker dataNodeDiskChecker =
new DataNodeDiskChecker(permission);
ArrayList<File> dirs =
getDataDirsFromURIs(dataDirs, localFS, dataNodeDiskChecker);
DefaultMetricsSystem.initialize("DataNode");
assert dirs.size() > 0 : "number of data directories should be > 0";
@ -1644,7 +1662,8 @@ public class DataNode extends Configured
// DataNode ctor expects AbstractList instead of List or Collection...
static ArrayList<File> getDataDirsFromURIs(Collection<URI> dataDirs,
LocalFileSystem localFS, FsPermission permission) throws IOException {
LocalFileSystem localFS, DataNodeDiskChecker dataNodeDiskChecker)
throws IOException {
ArrayList<File> dirs = new ArrayList<File>();
StringBuilder invalidDirs = new StringBuilder();
for (URI dirURI : dataDirs) {
@ -1656,7 +1675,7 @@ public class DataNode extends Configured
// drop any (illegal) authority in the URI for backwards compatibility
File dir = new File(dirURI.getPath());
try {
DiskChecker.checkDir(localFS, new Path(dir.toURI()), permission);
dataNodeDiskChecker.checkDir(localFS, new Path(dir.toURI()));
dirs.add(dir);
} catch (IOException ioe) {
LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " "

View File

@ -44,4 +44,5 @@ class CheckpointFaultInjector {
return false;
}
public void afterMD5Rename() throws IOException {}
}

View File

@ -50,6 +50,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Storage.StorageState;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
@ -582,11 +583,11 @@ public class FSImage implements Closeable {
boolean loadFSImage(FSNamesystem target, MetaRecoveryContext recovery)
throws IOException {
FSImageStorageInspector inspector = storage.readAndInspectDirs();
FSImageFile imageFile = null;
isUpgradeFinalized = inspector.isUpgradeFinalized();
FSImageStorageInspector.FSImageFile imageFile
= inspector.getLatestImage();
List<FSImageFile> imageFiles = inspector.getLatestImages();
boolean needToSave = inspector.needToSave();
Iterable<EditLogInputStream> editStreams = null;
@ -599,7 +600,8 @@ public class FSImage implements Closeable {
// we better be able to load all the edits. If we're the standby NN, it's
// OK to not be able to read all of edits right now.
long toAtLeastTxId = editLog.isOpenForWrite() ? inspector.getMaxSeenTxId() : 0;
editStreams = editLog.selectInputStreams(imageFile.getCheckpointTxId() + 1,
editStreams = editLog.selectInputStreams(
imageFiles.get(0).getCheckpointTxId() + 1,
toAtLeastTxId, recovery, false);
} else {
editStreams = FSImagePreTransactionalStorageInspector
@ -612,7 +614,6 @@ public class FSImage implements Closeable {
elis.setMaxOpSize(maxOpSize);
}
LOG.debug("Planning to load image :\n" + imageFile);
for (EditLogInputStream l : editStreams) {
LOG.debug("Planning to load edit log stream: " + l);
}
@ -620,34 +621,21 @@ public class FSImage implements Closeable {
LOG.info("No edit log streams selected.");
}
try {
StorageDirectory sdForProperties = imageFile.sd;
storage.readProperties(sdForProperties);
if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT,
getLayoutVersion())) {
// For txid-based layout, we should have a .md5 file
// next to the image file
loadFSImage(imageFile.getFile(), target, recovery);
} else if (LayoutVersion.supports(Feature.FSIMAGE_CHECKSUM,
getLayoutVersion())) {
// In 0.22, we have the checksum stored in the VERSION file.
String md5 = storage.getDeprecatedProperty(
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY);
if (md5 == null) {
throw new InconsistentFSStateException(sdForProperties.getRoot(),
"Message digest property " +
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
" not set for storage directory " + sdForProperties.getRoot());
}
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery);
} else {
// We don't have any record of the md5sum
loadFSImage(imageFile.getFile(), null, target, recovery);
for (int i = 0; i < imageFiles.size(); i++) {
try {
imageFile = imageFiles.get(i);
loadFSImageFile(target, recovery, imageFile);
break;
} catch (IOException ioe) {
LOG.error("Failed to load image from " + imageFile, ioe);
target.clear();
imageFile = null;
}
} catch (IOException ioe) {
}
// Failed to load any images, error out
if (imageFile == null) {
FSEditLog.closeAllStreams(editStreams);
throw new IOException("Failed to load image from " + imageFile, ioe);
throw new IOException("Failed to load an FSImage file!");
}
long txnsAdvanced = loadEdits(editStreams, target, recovery);
needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(),
@ -656,6 +644,35 @@ public class FSImage implements Closeable {
return needToSave;
}
void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery,
FSImageFile imageFile) throws IOException {
LOG.debug("Planning to load image :\n" + imageFile);
StorageDirectory sdForProperties = imageFile.sd;
storage.readProperties(sdForProperties);
if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT,
getLayoutVersion())) {
// For txid-based layout, we should have a .md5 file
// next to the image file
loadFSImage(imageFile.getFile(), target, recovery);
} else if (LayoutVersion.supports(Feature.FSIMAGE_CHECKSUM,
getLayoutVersion())) {
// In 0.22, we have the checksum stored in the VERSION file.
String md5 = storage.getDeprecatedProperty(
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY);
if (md5 == null) {
throw new InconsistentFSStateException(sdForProperties.getRoot(),
"Message digest property " +
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
" not set for storage directory " + sdForProperties.getRoot());
}
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery);
} else {
// We don't have any record of the md5sum
loadFSImage(imageFile.getFile(), null, target, recovery);
}
}
public void initEditLog() {
Preconditions.checkState(getNamespaceID() != 0,
"Must know namespace ID before initting edit log");
@ -1131,7 +1148,7 @@ public class FSImage implements Closeable {
*/
public synchronized void saveDigestAndRenameCheckpointImage(
long txid, MD5Hash digest) throws IOException {
renameCheckpoint(txid);
// Write and rename MD5 file
List<StorageDirectory> badSds = Lists.newArrayList();
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
@ -1144,6 +1161,10 @@ public class FSImage implements Closeable {
}
storage.reportErrorsOnDirectories(badSds);
CheckpointFaultInjector.getInstance().afterMD5Rename();
// Rename image from tmp file
renameCheckpoint(txid);
// So long as this is the newest image available,
// advertise it as such to other checkpointers
// from now on

View File

@ -300,8 +300,8 @@ public class FSImageFormat {
loadSecretManagerState(in);
// make sure to read to the end of file
int eof = in.read();
assert eof == -1 : "Should have reached the end of image file " + curFile;
boolean eof = (in.read() == -1);
assert eof : "Should have reached the end of image file " + curFile;
} finally {
in.close();
}

View File

@ -25,6 +25,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
@ -146,7 +147,7 @@ class FSImagePreTransactionalStorageInspector extends FSImageStorageInspector {
}
@Override
FSImageFile getLatestImage() throws IOException {
List<FSImageFile> getLatestImages() throws IOException {
// We should have at least one image and one edits dirs
if (latestNameSD == null)
throw new IOException("Image file is not found in " + imageDirs);
@ -176,9 +177,12 @@ class FSImagePreTransactionalStorageInspector extends FSImageStorageInspector {
needToSaveAfterRecovery = doRecovery();
return new FSImageFile(latestNameSD,
FSImageFile file = new FSImageFile(latestNameSD,
NNStorage.getStorageFile(latestNameSD, NameNodeFile.IMAGE),
HdfsConstants.INVALID_TXID);
LinkedList<FSImageFile> ret = new LinkedList<FSImageFile>();
ret.add(file);
return ret;
}
@Override

View File

@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -45,7 +47,7 @@ abstract class FSImageStorageInspector {
* Get the image files which should be loaded into the filesystem.
* @throws IOException if not enough files are available (eg no image found in any directory)
*/
abstract FSImageFile getLatestImage() throws IOException;
abstract List<FSImageFile> getLatestImages() throws IOException;
/**
* Get the minimum tx id which should be loaded with this set of images.

View File

@ -22,6 +22,7 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -108,24 +109,31 @@ class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
}
/**
* @return the image that has the most recent associated transaction ID.
* If there are multiple storage directories which contain equal images
* the storage directory that was inspected first will be preferred.
* @return the image files that have the most recent associated
* transaction IDs. If there are multiple storage directories which
* contain equal images, we'll return them all.
*
* @throws FileNotFoundException if not images are found.
*/
@Override
FSImageFile getLatestImage() throws IOException {
if (foundImages.isEmpty()) {
throw new FileNotFoundException("No valid image files found");
}
FSImageFile ret = null;
List<FSImageFile> getLatestImages() throws IOException {
LinkedList<FSImageFile> ret = new LinkedList<FSImageFile>();
for (FSImageFile img : foundImages) {
if (ret == null || img.txId > ret.txId) {
ret = img;
if (ret.isEmpty()) {
ret.add(img);
} else {
FSImageFile cur = ret.getFirst();
if (cur.txId == img.txId) {
ret.add(img);
} else if (cur.txId < img.txId) {
ret.clear();
ret.add(img);
}
}
}
if (ret.isEmpty()) {
throw new FileNotFoundException("No valid image files found");
}
return ret;
}

View File

@ -34,6 +34,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
@ -603,7 +604,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY,
DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
LOG.info("Append Enabled: " + supportAppends);
@ -1137,8 +1139,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void metaSave(String filename) throws IOException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
writeLock();
try {
checkOperation(OperationCategory.UNCHECKED);
File file = new File(System.getProperty("hadoop.log.dir"), filename);
PrintWriter out = new PrintWriter(new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(file, true), Charsets.UTF_8)));
@ -1212,6 +1216,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
UnresolvedLinkException, IOException {
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1249,6 +1254,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
UnresolvedLinkException, IOException {
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1359,13 +1365,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throws FileNotFoundException, UnresolvedLinkException, IOException {
for (int attempt = 0; attempt < 2; attempt++) {
if (attempt == 0) { // first attempt is with readlock
boolean isReadOp = (attempt == 0);
if (isReadOp) { // first attempt is with readlock
checkOperation(OperationCategory.READ);
readLock();
} else { // second attempt is with write lock
checkOperation(OperationCategory.WRITE);
writeLock(); // writelock is needed to set accesstime
}
try {
checkOperation(OperationCategory.READ);
if (isReadOp) {
checkOperation(OperationCategory.READ);
} else {
checkOperation(OperationCategory.WRITE);
}
// if the namenode is in safemode, then do not update access time
if (isInSafeMode()) {
@ -1380,7 +1393,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
if (now <= inode.getAccessTime() + getAccessTimePrecision()) {
// if we have to set access time but we only have the readlock, then
// restart this entire operation with the writeLock.
if (attempt == 0) {
if (isReadOp) {
continue;
}
}
@ -1392,7 +1405,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
return blockManager.createLocatedBlocks(inode.getBlocks(), fileSize,
inode.isUnderConstruction(), offset, length, needBlockToken);
} finally {
if (attempt == 0) {
if (isReadOp) {
readUnlock();
} else {
writeUnlock();
@ -1448,6 +1461,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1595,6 +1609,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1636,6 +1651,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throws IOException, UnresolvedLinkException {
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1711,6 +1727,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
blockManager.verifyReplication(src, replication, null);
final boolean isFile;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1741,6 +1758,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
long getPreferredBlockSize(String filename)
throws IOException, UnresolvedLinkException {
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
@ -1803,6 +1821,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
boolean skipSync = false;
final HdfsFileStatus stat;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -1995,6 +2014,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throws IOException {
boolean skipSync = false;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2132,6 +2152,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
LocatedBlock lb = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2198,8 +2219,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
// Part I. Analyze the state of the file with respect to the input data.
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
LocatedBlock[] onRetryBlock = new LocatedBlock[1];
final INode[] inodes = analyzeFileState(
src, fileId, clientName, previous, onRetryBlock).getINodes();
@ -2226,8 +2249,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// Allocate a new block, add it to the INode and the BlocksMap.
Block newBlock = null;
long offset;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
// Run the full analysis again, since things could have changed
// while chooseTarget() was executing.
LocatedBlock[] onRetryBlock = new LocatedBlock[1];
@ -2379,9 +2404,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
final DatanodeDescriptor clientnode;
final long preferredblocksize;
final List<DatanodeDescriptor> chosen;
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.WRITE);
checkOperation(OperationCategory.READ);
//check safe mode
if (isInSafeMode()) {
throw new SafeModeException("Cannot add datanode; src=" + src
@ -2421,6 +2447,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
boolean abandonBlock(ExtendedBlock b, String src, String holder)
throws LeaseExpiredException, FileNotFoundException,
UnresolvedLinkException, IOException {
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2498,6 +2525,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throws SafeModeException, UnresolvedLinkException, IOException {
checkBlock(last);
boolean success = false;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2669,6 +2697,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
" to " + dst);
}
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2725,6 +2754,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
+ src + " to " + dst);
}
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2811,6 +2841,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
IOException {
BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2939,6 +2970,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
StandbyException, IOException {
HdfsFileStatus stat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
@ -2981,6 +3013,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
}
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -3041,6 +3074,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
FileNotFoundException, UnresolvedLinkException, StandbyException {
FSPermissionChecker pc = new FSPermissionChecker(fsOwnerShortUserName,
supergroup);
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
@ -3061,6 +3095,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void setQuota(String path, long nsQuota, long dsQuota)
throws IOException, UnresolvedLinkException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -3084,6 +3119,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void fsync(String src, String clientName, long lastBlockLength)
throws IOException, UnresolvedLinkException {
NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -3293,6 +3329,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
String[] newtargetstorages)
throws IOException, UnresolvedLinkException {
String src = "";
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -3397,6 +3434,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* Renew the lease(s) held by the given client
*/
void renewLease(String holder) throws IOException {
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -3438,6 +3476,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throws AccessControlException, UnresolvedLinkException, IOException {
DirectoryListing dl;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
@ -3734,10 +3773,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
DatanodeInfo[] datanodeReport(final DatanodeReportType type
) throws AccessControlException {
) throws AccessControlException, StandbyException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
readLock();
try {
checkOperation(OperationCategory.UNCHECKED);
final DatanodeManager dm = getBlockManager().getDatanodeManager();
final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
@ -3761,8 +3802,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void saveNamespace() throws AccessControlException, IOException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
readLock();
try {
checkOperation(OperationCategory.UNCHECKED);
if (!isInSafeMode()) {
throw new IOException("Safe mode should be turned ON " +
"in order to create namespace image.");
@ -3780,10 +3823,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*
* @throws AccessControlException if superuser privilege is violated.
*/
boolean restoreFailedStorage(String arg) throws AccessControlException {
boolean restoreFailedStorage(String arg) throws AccessControlException,
StandbyException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.UNCHECKED);
writeLock();
try {
checkOperation(OperationCategory.UNCHECKED);
// if it is disabled - enable it and vice versa.
if(arg.equals("check"))
@ -3804,6 +3850,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void finalizeUpgrade() throws IOException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -4543,6 +4590,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
CheckpointSignature rollEditLog() throws IOException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.JOURNAL);
writeLock();
try {
checkOperation(OperationCategory.JOURNAL);
@ -4560,6 +4608,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
NamenodeRegistration bnReg, // backup node
NamenodeRegistration nnReg) // active name-node
throws IOException {
checkOperation(OperationCategory.CHECKPOINT);
writeLock();
try {
checkOperation(OperationCategory.CHECKPOINT);
@ -4578,6 +4627,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void endCheckpoint(NamenodeRegistration registration,
CheckpointSignature sig) throws IOException {
checkOperation(OperationCategory.CHECKPOINT);
readLock();
try {
checkOperation(OperationCategory.CHECKPOINT);
@ -4866,6 +4916,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* Client is reporting some bad block locations.
*/
void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -4900,6 +4951,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
LocatedBlock updateBlockForPipeline(ExtendedBlock block,
String clientName) throws IOException {
LocatedBlock locatedBlock;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -4931,6 +4983,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void updatePipeline(String clientName, ExtendedBlock oldBlock,
ExtendedBlock newBlock, DatanodeID[] newNodes)
throws IOException {
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -5058,8 +5111,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void releaseBackupNode(NamenodeRegistration registration)
throws IOException {
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
if(getFSImage().getStorage().getNamespaceID()
!= registration.getNamespaceID())
throw new IOException("Incompatible namespaceIDs: "
@ -5098,6 +5153,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
String[] cookieTab) throws IOException {
checkSuperuserPrivilege();
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
@ -5190,6 +5246,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
throws IOException {
Token<DelegationTokenIdentifier> token;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -5236,6 +5293,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
long renewDelegationToken(Token<DelegationTokenIdentifier> token)
throws InvalidToken, IOException {
long expiryTime;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -5268,6 +5326,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
throws IOException {
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);

View File

@ -339,7 +339,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
throw new IllegalArgumentException(
"Unexpected not positive size: "+size);
}
namesystem.checkOperation(OperationCategory.READ);
namesystem.checkSuperuserPrivilege();
return namesystem.getBlockManager().getBlocks(datanode, size);
}
@ -709,7 +708,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // ClientProtocol
public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
DatanodeInfo results[] = namesystem.datanodeReport(type);
if (results == null ) {
throw new IOException("Cannot find datanode report");
@ -734,19 +732,16 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // ClientProtocol
public boolean restoreFailedStorage(String arg) throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
return namesystem.restoreFailedStorage(arg);
}
@Override // ClientProtocol
public void saveNamespace() throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
namesystem.saveNamespace();
}
@Override // ClientProtocol
public long rollEdits() throws AccessControlException, IOException {
namesystem.checkOperation(OperationCategory.JOURNAL);
CheckpointSignature sig = namesystem.rollEditLog();
return sig.getCurSegmentTxId();
}
@ -791,7 +786,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
@Override // ClientProtocol
public void metaSave(String filename) throws IOException {
namesystem.checkOperation(OperationCategory.UNCHECKED);
namesystem.metaSave(filename);
}

View File

@ -18,7 +18,9 @@
package org.apache.hadoop.hdfs.server.namenode;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
import org.apache.hadoop.hdfs.util.RwLock;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.security.AccessControlException;
/** Namesystem operations. */
@ -38,4 +40,6 @@ public interface Namesystem extends RwLock, SafeMode {
public boolean isGenStampInFuture(long generationStamp);
public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal);
public void checkOperation(OperationCategory read) throws StandbyException;
}

View File

@ -64,9 +64,17 @@ public interface HAContext {
void writeUnlock();
/**
* Verify that the given operation category is allowed in the
* current state. This is to allow NN implementations (eg BackupNode)
* to override it with node-specific handling.
* Verify that the given operation category is allowed in the current state.
* This is to allow NN implementations (eg BackupNode) to override it with
* node-specific handling.
*
* If the operation which is being checked will be taking the FSNS lock, it's
* advisable to check the operation category both immediately before and after
* taking the lock. This is because clients rely on the StandbyException
* thrown by this method in order to trigger client failover, and if a client
* first tries to contact the Standby NN, it could block for a long time if
* the Standby is holding the lock for a while, e.g. when performing a
* checkpoint. See HDFS-4591 for more details.
*/
void checkOperation(OperationCategory op) throws StandbyException;

View File

@ -72,7 +72,7 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
/** Modification version for fail-fast.
* @see ConcurrentModificationException
*/
private volatile int modification = 0;
private int modification = 0;
/**
* @param recommended_length Recommended size of the internal array.

View File

@ -29,6 +29,7 @@ import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -376,17 +377,6 @@ public class WebHdfsFileSystem extends FileSystem
return url;
}
private HttpURLConnection getHttpUrlConnection(URL url)
throws IOException, AuthenticationException {
final HttpURLConnection conn;
if (ugi.hasKerberosCredentials()) {
conn = new AuthenticatedURL(AUTH).openConnection(url, authToken);
} else {
conn = (HttpURLConnection)url.openConnection();
}
return conn;
}
/**
* Run a http operation.
* Connect to the http server, validate response, and obtain the JSON output.
@ -431,6 +421,48 @@ public class WebHdfsFileSystem extends FileSystem
this.conn = conn;
}
private HttpURLConnection getHttpUrlConnection(final URL url)
throws IOException, AuthenticationException {
UserGroupInformation connectUgi = ugi.getRealUser();
if (connectUgi == null) {
connectUgi = ugi;
}
try {
return connectUgi.doAs(
new PrivilegedExceptionAction<HttpURLConnection>() {
@Override
public HttpURLConnection run() throws IOException {
return openHttpUrlConnection(url);
}
});
} catch (IOException ioe) {
Throwable cause = ioe.getCause();
if (cause != null && cause instanceof AuthenticationException) {
throw (AuthenticationException)cause;
}
throw ioe;
} catch (InterruptedException e) {
throw new IOException(e);
}
}
private HttpURLConnection openHttpUrlConnection(final URL url)
throws IOException {
final HttpURLConnection conn;
try {
if (op.getRequireAuth()) {
LOG.debug("open AuthenticatedURL connection");
conn = new AuthenticatedURL(AUTH).openConnection(url, authToken);
} else {
LOG.debug("open URL connection");
conn = (HttpURLConnection)url.openConnection();
}
} catch (AuthenticationException e) {
throw new IOException(e);
}
return conn;
}
private void init() throws IOException {
checkRetry = !redirected;
try {

View File

@ -0,0 +1,64 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.spi.LoggingEvent;
import org.apache.log4j.spi.ThrowableInformation;
/**
* Used to verify that certain exceptions or messages are present in log output.
*/
public class LogVerificationAppender extends AppenderSkeleton {
private final List<LoggingEvent> log = new ArrayList<LoggingEvent>();
@Override
public boolean requiresLayout() {
return false;
}
@Override
protected void append(final LoggingEvent loggingEvent) {
log.add(loggingEvent);
}
@Override
public void close() {
}
public List<LoggingEvent> getLog() {
return new ArrayList<LoggingEvent>(log);
}
public int countExceptionsWithMessage(final String text) {
int count = 0;
for (LoggingEvent e: getLog()) {
ThrowableInformation t = e.getThrowableInformation();
if (t != null) {
String m = t.getThrowable().getMessage();
if (m.contains(text)) {
count++;
}
}
}
return count;
}
}

View File

@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;
import org.junit.Test;
/**
@ -293,6 +294,11 @@ public class TestDFSUpgradeFromImage {
new File(baseDir, "name2/current/VERSION"),
"imageMD5Digest", "22222222222222222222222222222222");
// Attach our own log appender so we can verify output
final LogVerificationAppender appender = new LogVerificationAppender();
final Logger logger = Logger.getRootLogger();
logger.addAppender(appender);
// Upgrade should now fail
try {
upgradeAndVerify(new MiniDFSCluster.Builder(upgradeConf).
@ -300,9 +306,12 @@ public class TestDFSUpgradeFromImage {
fail("Upgrade did not fail with bad MD5");
} catch (IOException ioe) {
String msg = StringUtils.stringifyException(ioe);
if (!msg.contains("is corrupt with MD5 checksum")) {
if (!msg.contains("Failed to load an FSImage file")) {
throw ioe;
}
int md5failures = appender.countExceptionsWithMessage(
" is corrupt with MD5 checksum of ");
assertEquals("Upgrade did not fail with bad MD5", 1, md5failures);
}
}

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
@ -39,7 +40,7 @@ import org.junit.Test;
public class TestHftpURLTimeouts {
@BeforeClass
public static void setup() {
URLUtils.SOCKET_TIMEOUT = 1;
URLUtils.SOCKET_TIMEOUT = 5;
}
@Test
@ -116,6 +117,7 @@ public class TestHftpURLTimeouts {
conns.add(fs.openConnection("/", ""));
} catch (SocketTimeoutException ste) {
String message = ste.getMessage();
assertNotNull(message);
// https will get a read timeout due to SSL negotiation, but
// a normal http will not, so need to ignore SSL read timeouts
// until a connect timeout occurs

View File

@ -65,7 +65,7 @@ public class TestMiniDFSCluster {
*
* @throws Throwable on a failure
*/
@Test
@Test(timeout=100000)
public void testClusterWithoutSystemProperties() throws Throwable {
System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA);
Configuration conf = new HdfsConfiguration();
@ -74,7 +74,8 @@ public class TestMiniDFSCluster {
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, c1Path);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
assertEquals(c1Path+"/data", cluster.getDataDirectory());
assertEquals(new File(c1Path + "/data"),
new File(cluster.getDataDirectory()));
} finally {
cluster.shutdown();
}
@ -84,7 +85,7 @@ public class TestMiniDFSCluster {
* Bring up two clusters and assert that they are in different directories.
* @throws Throwable on a failure
*/
@Test
@Test(timeout=100000)
public void testDualClusters() throws Throwable {
File testDataCluster2 = new File(testDataPath, CLUSTER_2);
File testDataCluster3 = new File(testDataPath, CLUSTER_3);
@ -95,7 +96,7 @@ public class TestMiniDFSCluster {
MiniDFSCluster cluster3 = null;
try {
String dataDir2 = cluster2.getDataDirectory();
assertEquals(c2Path + "/data", dataDir2);
assertEquals(new File(c2Path + "/data"), new File(dataDir2));
//change the data dir
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR,
testDataCluster3.getAbsolutePath());

View File

@ -18,9 +18,11 @@
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import java.io.EOFException;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
@ -32,6 +34,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
@ -85,9 +88,20 @@ public class TestShortCircuitLocalRead {
}
}
}
private static String getCurrentUser() throws IOException {
return UserGroupInformation.getCurrentUser().getShortUserName();
}
static void checkFileContent(FileSystem fs, Path name, byte[] expected,
int readOffset) throws IOException {
/** Check file content, reading as user {@code readingUser} */
static void checkFileContent(URI uri, Path name, byte[] expected,
int readOffset, String readingUser, Configuration conf,
boolean shortCircuitFails)
throws IOException, InterruptedException {
// Ensure short circuit is enabled
DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
assertTrue(fs.getClient().getShortCircuitLocalReads());
FSDataInputStream stm = fs.open(name);
byte[] actual = new byte[expected.length-readOffset];
stm.readFully(readOffset, actual);
@ -112,6 +126,11 @@ public class TestShortCircuitLocalRead {
nread += nbytes;
}
checkData(actual, readOffset, expected, "Read 3");
if (shortCircuitFails) {
// short circuit should be disabled due to failure
assertFalse(fs.getClient().getShortCircuitLocalReads());
}
stm.close();
}
@ -123,11 +142,15 @@ public class TestShortCircuitLocalRead {
return arr;
}
/**
* Verifies that reading a file with the direct read(ByteBuffer) api gives the expected set of bytes.
*/
static void checkFileContentDirect(FileSystem fs, Path name, byte[] expected,
int readOffset) throws IOException {
/** Check the file content, reading as user {@code readingUser} */
static void checkFileContentDirect(URI uri, Path name, byte[] expected,
int readOffset, String readingUser, Configuration conf,
boolean shortCircuitFails)
throws IOException, InterruptedException {
// Ensure short circuit is enabled
DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
assertTrue(fs.getClient().getShortCircuitLocalReads());
HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(name);
ByteBuffer actual = ByteBuffer.allocateDirect(expected.length - readOffset);
@ -157,21 +180,33 @@ public class TestShortCircuitLocalRead {
nread += nbytes;
}
checkData(arrayFromByteBuffer(actual), readOffset, expected, "Read 3");
if (shortCircuitFails) {
// short circuit should be disabled due to failure
assertFalse(fs.getClient().getShortCircuitLocalReads());
}
stm.close();
}
public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
int readOffset) throws IOException, InterruptedException {
String shortCircuitUser = getCurrentUser();
doTestShortCircuitRead(ignoreChecksum, size, readOffset, shortCircuitUser,
shortCircuitUser, false);
}
/**
* Test that file data can be read by reading the block file
* directly from the local store.
*/
public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
int readOffset) throws IOException {
int readOffset, String shortCircuitUser, String readingUser,
boolean shortCircuitFails) throws IOException, InterruptedException {
Configuration conf = new Configuration();
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
ignoreChecksum);
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
UserGroupInformation.getCurrentUser().getShortUserName());
shortCircuitUser);
if (simulatedStorage) {
SimulatedFSDataset.setFactory(conf);
}
@ -184,53 +219,88 @@ public class TestShortCircuitLocalRead {
assertTrue("/ should be a directory", fs.getFileStatus(path)
.isDirectory() == true);
byte[] fileData = AppendTestUtil.randomBytes(seed, size);
// create a new file in home directory. Do not close it.
Path file1 = new Path("filelocal.dat");
byte[] fileData = AppendTestUtil.randomBytes(seed, size);
Path file1 = fs.makeQualified(new Path("filelocal.dat"));
FSDataOutputStream stm = createFile(fs, file1, 1);
// write to file
stm.write(fileData);
stm.close();
checkFileContent(fs, file1, fileData, readOffset);
checkFileContentDirect(fs, file1, fileData, readOffset);
URI uri = cluster.getURI();
checkFileContent(uri, file1, fileData, readOffset, readingUser, conf,
shortCircuitFails);
checkFileContentDirect(uri, file1, fileData, readOffset, readingUser,
conf, shortCircuitFails);
} finally {
fs.close();
cluster.shutdown();
}
}
@Test
public void testFileLocalReadNoChecksum() throws IOException {
@Test(timeout=10000)
public void testFileLocalReadNoChecksum() throws Exception {
doTestShortCircuitRead(true, 3*blockSize+100, 0);
}
@Test
public void testFileLocalReadChecksum() throws IOException {
@Test(timeout=10000)
public void testFileLocalReadChecksum() throws Exception {
doTestShortCircuitRead(false, 3*blockSize+100, 0);
}
@Test
public void testSmallFileLocalRead() throws IOException {
@Test(timeout=10000)
public void testSmallFileLocalRead() throws Exception {
doTestShortCircuitRead(false, 13, 0);
doTestShortCircuitRead(false, 13, 5);
doTestShortCircuitRead(true, 13, 0);
doTestShortCircuitRead(true, 13, 5);
}
@Test
public void testReadFromAnOffset() throws IOException {
/**
* Try a short circuit from a reader that is not allowed to
* to use short circuit. The test ensures reader falls back to non
* shortcircuit reads when shortcircuit is disallowed.
*/
@Test(timeout=10000)
public void testLocalReadFallback() throws Exception {
doTestShortCircuitRead(true, 13, 0, getCurrentUser(), "notallowed", true);
}
@Test(timeout=10000)
public void testReadFromAnOffset() throws Exception {
doTestShortCircuitRead(false, 3*blockSize+100, 777);
doTestShortCircuitRead(true, 3*blockSize+100, 777);
}
@Test
public void testLongFile() throws IOException {
@Test(timeout=10000)
public void testLongFile() throws Exception {
doTestShortCircuitRead(false, 10*blockSize+100, 777);
doTestShortCircuitRead(true, 10*blockSize+100, 777);
}
@Test
private ClientDatanodeProtocol getProxy(UserGroupInformation ugi,
final DatanodeID dnInfo, final Configuration conf) throws IOException,
InterruptedException {
return ugi.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
@Override
public ClientDatanodeProtocol run() throws Exception {
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf, 60000,
false);
}
});
}
private static DistributedFileSystem getFileSystem(String user, final URI uri,
final Configuration conf) throws InterruptedException, IOException {
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
return ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() {
@Override
public DistributedFileSystem run() throws Exception {
return (DistributedFileSystem)FileSystem.get(uri, conf);
}
});
}
@Test(timeout=10000)
public void testGetBlockLocalPathInfo() throws IOException, InterruptedException {
final Configuration conf = new Configuration();
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
@ -253,15 +323,7 @@ public class TestShortCircuitLocalRead {
ExtendedBlock blk = new ExtendedBlock(lb.get(0).getBlock());
Token<BlockTokenIdentifier> token = lb.get(0).getBlockToken();
final DatanodeInfo dnInfo = lb.get(0).getLocations()[0];
ClientDatanodeProtocol proxy = aUgi1
.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
@Override
public ClientDatanodeProtocol run() throws Exception {
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
60000, false);
}
});
ClientDatanodeProtocol proxy = getProxy(aUgi1, dnInfo, conf);
// This should succeed
BlockLocalPathInfo blpi = proxy.getBlockLocalPathInfo(blk, token);
Assert.assertEquals(
@ -269,14 +331,7 @@ public class TestShortCircuitLocalRead {
blpi.getBlockPath());
// Try with the other allowed user
proxy = aUgi2
.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
@Override
public ClientDatanodeProtocol run() throws Exception {
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
60000, false);
}
});
proxy = getProxy(aUgi2, dnInfo, conf);
// This should succeed as well
blpi = proxy.getBlockLocalPathInfo(blk, token);
@ -287,14 +342,7 @@ public class TestShortCircuitLocalRead {
// Now try with a disallowed user
UserGroupInformation bUgi = UserGroupInformation
.createRemoteUser("notalloweduser");
proxy = bUgi
.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
@Override
public ClientDatanodeProtocol run() throws Exception {
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
60000, false);
}
});
proxy = getProxy(bUgi, dnInfo, conf);
try {
proxy.getBlockLocalPathInfo(blk, token);
Assert.fail("The call should have failed as " + bUgi.getShortUserName()
@ -309,14 +357,14 @@ public class TestShortCircuitLocalRead {
}
}
@Test
@Test(timeout=10000)
public void testSkipWithVerifyChecksum() throws IOException {
int size = blockSize;
Configuration conf = new Configuration();
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
UserGroupInformation.getCurrentUser().getShortUserName());
getCurrentUser());
if (simulatedStorage) {
SimulatedFSDataset.setFactory(conf);
}
@ -356,7 +404,7 @@ public class TestShortCircuitLocalRead {
}
/**
* Test to run benchmarks between shortcircuit read vs regular read with
* Test to run benchmarks between short circuit read vs regular read with
* specified number of threads simultaneously reading.
* <br>
* Run this using the following command:
@ -374,7 +422,7 @@ public class TestShortCircuitLocalRead {
int threadCount = Integer.valueOf(args[2]);
// Setup create a file
Configuration conf = new Configuration();
final Configuration conf = new Configuration();
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, shortcircuit);
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
checksum);
@ -400,9 +448,13 @@ public class TestShortCircuitLocalRead {
public void run() {
for (int i = 0; i < iteration; i++) {
try {
checkFileContent(fs, file1, dataToWrite, 0);
String user = getCurrentUser();
checkFileContent(fs.getUri(), file1, dataToWrite, 0, user, conf,
true);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

View File

@ -46,6 +46,7 @@ import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.MetricsAsserts;
import org.apache.hadoop.util.Shell;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@ -65,6 +66,8 @@ public class TestJournalNode {
private Configuration conf = new Configuration();
private IPCLoggerChannel ch;
private String journalId;
private File TEST_BUILD_DATA =
new File(System.getProperty("test.build.data", "build/test/data"));
static {
// Avoid an error when we double-initialize JvmMetrics
@ -96,7 +99,7 @@ public class TestJournalNode {
jn.stop(0);
}
@Test
@Test(timeout=100000)
public void testJournal() throws Exception {
MetricsRecordBuilder metrics = MetricsAsserts.getMetrics(
journal.getMetricsForTests().getName());
@ -129,7 +132,7 @@ public class TestJournalNode {
}
@Test
@Test(timeout=100000)
public void testReturnsSegmentInfoAtEpochTransition() throws Exception {
ch.newEpoch(1).get();
ch.setEpoch(1);
@ -157,7 +160,7 @@ public class TestJournalNode {
assertEquals(1, response.getLastSegmentTxId());
}
@Test
@Test(timeout=100000)
public void testHttpServer() throws Exception {
InetSocketAddress addr = jn.getBoundHttpAddress();
assertTrue(addr.getPort() > 0);
@ -210,7 +213,7 @@ public class TestJournalNode {
* Test that the JournalNode performs correctly as a Paxos
* <em>Acceptor</em> process.
*/
@Test
@Test(timeout=100000)
public void testAcceptRecoveryBehavior() throws Exception {
// We need to run newEpoch() first, or else we have no way to distinguish
// different proposals for the same decision.
@ -270,20 +273,27 @@ public class TestJournalNode {
}
}
@Test
@Test(timeout=100000)
public void testFailToStartWithBadConfig() throws Exception {
Configuration conf = new Configuration();
conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, "non-absolute-path");
assertJNFailsToStart(conf, "should be an absolute path");
// Existing file which is not a directory
conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, "/dev/null");
assertJNFailsToStart(conf, "is not a directory");
File existingFile = new File(TEST_BUILD_DATA, "testjournalnodefile");
assertTrue(existingFile.createNewFile());
try {
conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY,
existingFile.getAbsolutePath());
assertJNFailsToStart(conf, "Not a directory");
} finally {
existingFile.delete();
}
// Directory which cannot be created
conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, "/proc/does-not-exist");
assertJNFailsToStart(conf, "Could not create");
conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY,
Shell.WINDOWS ? "\\\\cannotBeCreated" : "/proc/does-not-exist");
assertJNFailsToStart(conf, "Can not create directory");
}
private static void assertJNFailsToStart(Configuration conf,

View File

@ -104,7 +104,7 @@ public class TestNodeCount {
while (iter.hasNext()) {
DatanodeDescriptor dn = iter.next();
Collection<Block> blocks = bm.excessReplicateMap.get(dn.getStorageID());
if (blocks == null || !blocks.contains(block) ) {
if (blocks == null || !blocks.contains(block.getLocalBlock()) ) {
nonExcessDN = dn;
break;
}

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.LogVerificationAppender;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@ -45,7 +46,6 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.util.Time;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.spi.LoggingEvent;
@ -419,7 +419,7 @@ public class TestReplicationPolicy {
(HdfsConstants.MIN_BLOCKS_FOR_WRITE-1)*BLOCK_SIZE, 0L, 0, 0);
}
final TestAppender appender = new TestAppender();
final LogVerificationAppender appender = new LogVerificationAppender();
final Logger logger = Logger.getRootLogger();
logger.addAppender(appender);
@ -446,28 +446,6 @@ public class TestReplicationPolicy {
HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
}
}
class TestAppender extends AppenderSkeleton {
private final List<LoggingEvent> log = new ArrayList<LoggingEvent>();
@Override
public boolean requiresLayout() {
return false;
}
@Override
protected void append(final LoggingEvent loggingEvent) {
log.add(loggingEvent);
}
@Override
public void close() {
}
public List<LoggingEvent> getLog() {
return new ArrayList<LoggingEvent>(log);
}
}
private boolean containsWithinRange(DatanodeDescriptor target,
DatanodeDescriptor[] nodes, int startIndex, int endIndex) {

View File

@ -27,33 +27,26 @@ import java.util.List;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
import static org.apache.hadoop.test.MockitoMaker.*;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.server.datanode.DataNode.DataNodeDiskChecker;
public class TestDataDirs {
@Test public void testGetDataDirsFromURIs() throws Throwable {
File localDir = make(stub(File.class).returning(true).from.exists());
when(localDir.mkdir()).thenReturn(true);
FsPermission normalPerm = new FsPermission("700");
FsPermission badPerm = new FsPermission("000");
FileStatus stat = make(stub(FileStatus.class)
.returning(normalPerm, normalPerm, badPerm).from.getPermission());
when(stat.isDirectory()).thenReturn(true);
LocalFileSystem fs = make(stub(LocalFileSystem.class)
.returning(stat).from.getFileStatus(any(Path.class)));
when(fs.pathToFile(any(Path.class))).thenReturn(localDir);
@Test (timeout = 10000)
public void testGetDataDirsFromURIs() throws Throwable {
DataNodeDiskChecker diskChecker = mock(DataNodeDiskChecker.class);
doThrow(new IOException()).doThrow(new IOException()).doNothing()
.when(diskChecker).checkDir(any(LocalFileSystem.class), any(Path.class));
LocalFileSystem fs = mock(LocalFileSystem.class);
Collection<URI> uris = Arrays.asList(new URI("file:/p1/"),
new URI("file:/p2/"), new URI("file:/p3/"));
List<File> dirs = DataNode.getDataDirsFromURIs(uris, fs, normalPerm);
verify(fs, times(2)).setPermission(any(Path.class), eq(normalPerm));
verify(fs, times(6)).getFileStatus(any(Path.class));
assertEquals("number of valid data dirs", dirs.size(), 1);
List<File> dirs = DataNode.getDataDirsFromURIs(uris, fs, diskChecker);
assertEquals("number of valid data dirs", 1, dirs.size());
String validDir = dirs.iterator().next().getPath();
assertEquals("p3 should be valid", new File("/p3").getPath(), validDir);
}
}

View File

@ -274,15 +274,15 @@ public abstract class FSImageTestUtil {
for (File dir : dirs) {
FSImageTransactionalStorageInspector inspector =
inspectStorageDirectory(dir, NameNodeDirType.IMAGE);
FSImageFile latestImage = inspector.getLatestImage();
assertNotNull("No image in " + dir, latestImage);
long thisTxId = latestImage.getCheckpointTxId();
List<FSImageFile> latestImages = inspector.getLatestImages();
assert(!latestImages.isEmpty());
long thisTxId = latestImages.get(0).getCheckpointTxId();
if (imageTxId != -1 && thisTxId != imageTxId) {
fail("Storage directory " + dir + " does not have the same " +
"last image index " + imageTxId + " as another");
}
imageTxId = thisTxId;
imageFiles.add(inspector.getLatestImage().getFile());
imageFiles.add(inspector.getLatestImages().get(0).getFile());
}
assertFileContentsSame(imageFiles.toArray(new File[0]));
@ -426,7 +426,7 @@ public abstract class FSImageTestUtil {
new FSImageTransactionalStorageInspector();
inspector.inspectDirectory(sd);
return inspector.getLatestImage().getFile();
return inspector.getLatestImages().get(0).getFile();
}
/**
@ -441,8 +441,8 @@ public abstract class FSImageTestUtil {
new FSImageTransactionalStorageInspector();
inspector.inspectDirectory(sd);
FSImageFile latestImage = inspector.getLatestImage();
return (latestImage == null) ? null : latestImage.getFile();
List<FSImageFile> latestImages = inspector.getLatestImages();
return (latestImages.isEmpty()) ? null : latestImages.get(0).getFile();
}
/**

View File

@ -231,7 +231,7 @@ public class TestCheckpoint {
/*
* Simulate exception during edit replay.
*/
@Test(timeout=5000)
@Test(timeout=30000)
public void testReloadOnEditReplayFailure () throws IOException {
Configuration conf = new HdfsConfiguration();
FSDataOutputStream fos = null;
@ -1411,6 +1411,60 @@ public class TestCheckpoint {
}
}
/**
* Test NN restart if a failure happens in between creating the fsimage
* MD5 file and renaming the fsimage.
*/
@Test(timeout=30000)
public void testFailureBeforeRename () throws IOException {
Configuration conf = new HdfsConfiguration();
FSDataOutputStream fos = null;
SecondaryNameNode secondary = null;
MiniDFSCluster cluster = null;
FileSystem fs = null;
NameNode namenode = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
.build();
cluster.waitActive();
namenode = cluster.getNameNode();
fs = cluster.getFileSystem();
secondary = startSecondaryNameNode(conf);
fos = fs.create(new Path("tmpfile0"));
fos.write(new byte[] { 0, 1, 2, 3 });
secondary.doCheckpoint();
fos.write(new byte[] { 0, 1, 2, 3 });
fos.hsync();
// Cause merge to fail in next checkpoint.
Mockito.doThrow(new IOException(
"Injecting failure after MD5Rename"))
.when(faultInjector).afterMD5Rename();
try {
secondary.doCheckpoint();
fail("Fault injection failed.");
} catch (IOException ioe) {
// This is expected.
}
Mockito.reset(faultInjector);
// Namenode should still restart successfully
cluster.restartNameNode();
} finally {
if (secondary != null) {
secondary.shutdown();
}
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
Mockito.reset(faultInjector);
}
}
/**
* Test case where two secondary namenodes are checkpointing the same
* NameNode. This differs from {@link #testMultipleSecondaryNamenodes()}

View File

@ -57,7 +57,7 @@ public class TestFSImageStorageInspector {
inspector.inspectDirectory(mockDir);
assertEquals(2, inspector.foundImages.size());
FSImageFile latestImage = inspector.getLatestImage();
FSImageFile latestImage = inspector.getLatestImages().get(0);
assertEquals(456, latestImage.txId);
assertSame(mockDir, latestImage.sd);
assertTrue(inspector.isUpgradeFinalized());

View File

@ -120,12 +120,13 @@ public class TestHostsFiles {
InetSocketAddress nnHttpAddress = cluster.getNameNode().getHttpAddress();
LOG.info("nnaddr = '" + nnHttpAddress + "'");
URL nnjsp = new URL("http://" + nnHttpAddress.getHostName() + ":" + nnHttpAddress.getPort() + "/dfshealth.jsp");
String nnHostName = nnHttpAddress.getHostName();
URL nnjsp = new URL("http://" + nnHostName + ":" + nnHttpAddress.getPort() + "/dfshealth.jsp");
LOG.info("fetching " + nnjsp);
String dfshealthPage = StringEscapeUtils.unescapeHtml(DFSTestUtil.urlGet(nnjsp));
LOG.info("got " + dfshealthPage);
assertTrue("dfshealth should contain localhost, got:" + dfshealthPage,
dfshealthPage.contains("localhost"));
assertTrue("dfshealth should contain " + nnHostName + ", got:" + dfshealthPage,
dfshealthPage.contains(nnHostName));
} finally {
cluster.shutdown();

View File

@ -158,7 +158,7 @@ public class TestProcessCorruptBlocks {
* (corrupt replica should be removed since number of good
* replicas (1) is equal to replication factor (1))
*/
@Test
@Test(timeout=20000)
public void testWithReplicationFactorAsOne() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
@ -183,9 +183,14 @@ public class TestProcessCorruptBlocks {
namesystem.setReplication(fileName.toString(), (short) 1);
// wait for 3 seconds so that all block reports are processed.
try {
Thread.sleep(3000);
} catch (InterruptedException ignored) {
for (int i = 0; i < 10; i++) {
try {
Thread.sleep(1000);
} catch (InterruptedException ignored) {
}
if (countReplicas(namesystem, block).corruptReplicas() == 0) {
break;
}
}
assertEquals(1, countReplicas(namesystem, block).liveReplicas());

View File

@ -41,6 +41,7 @@ import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.DFSConfigKeys;
@ -219,7 +220,7 @@ public class TestSaveNamespace {
* Verify that a saveNamespace command brings faulty directories
* in fs.name.dir and fs.edit.dir back online.
*/
@Test
@Test (timeout=30000)
public void testReinsertnamedirsInSavenamespace() throws Exception {
// create a configuration with the key to restore error
// directories in fs.name.dir
@ -237,10 +238,13 @@ public class TestSaveNamespace {
FSImage spyImage = spy(originalImage);
fsn.dir.fsImage = spyImage;
FileSystem fs = FileSystem.getLocal(conf);
File rootDir = storage.getStorageDir(0).getRoot();
rootDir.setExecutable(false);
rootDir.setWritable(false);
rootDir.setReadable(false);
Path rootPath = new Path(rootDir.getPath(), "current");
final FsPermission permissionNone = new FsPermission((short) 0);
final FsPermission permissionAll = new FsPermission(
FsAction.ALL, FsAction.READ_EXECUTE, FsAction.READ_EXECUTE);
fs.setPermission(rootPath, permissionNone);
try {
doAnEdit(fsn, 1);
@ -257,9 +261,7 @@ public class TestSaveNamespace {
" bad directories.",
storage.getRemovedStorageDirs().size() == 1);
rootDir.setExecutable(true);
rootDir.setWritable(true);
rootDir.setReadable(true);
fs.setPermission(rootPath, permissionAll);
// The next call to savenamespace should try inserting the
// erroneous directory back to fs.name.dir. This command should
@ -290,9 +292,7 @@ public class TestSaveNamespace {
LOG.info("Reloaded image is good.");
} finally {
if (rootDir.exists()) {
rootDir.setExecutable(true);
rootDir.setWritable(true);
rootDir.setReadable(true);
fs.setPermission(rootPath, permissionAll);
}
if (fsn != null) {
@ -305,27 +305,27 @@ public class TestSaveNamespace {
}
}
@Test
@Test (timeout=30000)
public void testRTEWhileSavingSecondImage() throws Exception {
saveNamespaceWithInjectedFault(Fault.SAVE_SECOND_FSIMAGE_RTE);
}
@Test
@Test (timeout=30000)
public void testIOEWhileSavingSecondImage() throws Exception {
saveNamespaceWithInjectedFault(Fault.SAVE_SECOND_FSIMAGE_IOE);
}
@Test
@Test (timeout=30000)
public void testCrashInAllImageDirs() throws Exception {
saveNamespaceWithInjectedFault(Fault.SAVE_ALL_FSIMAGES);
}
@Test
@Test (timeout=30000)
public void testCrashWhenWritingVersionFiles() throws Exception {
saveNamespaceWithInjectedFault(Fault.WRITE_STORAGE_ALL);
}
@Test
@Test (timeout=30000)
public void testCrashWhenWritingVersionFileInOneDir() throws Exception {
saveNamespaceWithInjectedFault(Fault.WRITE_STORAGE_ONE);
}
@ -337,7 +337,7 @@ public class TestSaveNamespace {
* failed checkpoint since it only affected ".ckpt" files, not
* valid image files
*/
@Test
@Test (timeout=30000)
public void testFailedSaveNamespace() throws Exception {
doTestFailedSaveNamespace(false);
}
@ -347,7 +347,7 @@ public class TestSaveNamespace {
* the operator restores the directories and calls it again.
* This should leave the NN in a clean state for next start.
*/
@Test
@Test (timeout=30000)
public void testFailedSaveNamespaceWithRecovery() throws Exception {
doTestFailedSaveNamespace(true);
}
@ -421,7 +421,7 @@ public class TestSaveNamespace {
}
}
@Test
@Test (timeout=30000)
public void testSaveWhileEditsRolled() throws Exception {
Configuration conf = getConf();
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
@ -457,7 +457,7 @@ public class TestSaveNamespace {
}
}
@Test
@Test (timeout=30000)
public void testTxIdPersistence() throws Exception {
Configuration conf = getConf();
NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
@ -580,7 +580,7 @@ public class TestSaveNamespace {
* open lease and destination directory exist.
* This test is a regression for HDFS-2827
*/
@Test
@Test (timeout=30000)
public void testSaveNamespaceWithRenamedLease() throws Exception {
MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
.numDataNodes(1).build();
@ -603,7 +603,7 @@ public class TestSaveNamespace {
}
}
@Test
@Test (timeout=30000)
public void testSaveNamespaceWithDanglingLease() throws Exception {
MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
.numDataNodes(1).build();

View File

@ -31,12 +31,10 @@ import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
@ -46,17 +44,21 @@ import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.LogVerificationAppender;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.hdfs.util.MD5FileUtils;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@ -111,11 +113,12 @@ public class TestStartup {
}
}
/**
* start MiniDFScluster, create a file (to create edits) and do a checkpoint
/**
* Create a number of fsimage checkpoints
* @param count number of checkpoints to create
* @throws IOException
*/
public void createCheckPoint() throws IOException {
public void createCheckPoint(int count) throws IOException {
LOG.info("--starting mini cluster");
// manage dirs parameter set to false
MiniDFSCluster cluster = null;
@ -133,15 +136,18 @@ public class TestStartup {
sn = new SecondaryNameNode(config);
assertNotNull(sn);
// create a file
FileSystem fileSys = cluster.getFileSystem();
Path file1 = new Path("t1");
DFSTestUtil.createFile(fileSys, file1, fileSize, fileSize, blockSize,
(short) 1, seed);
LOG.info("--doing checkpoint");
sn.doCheckpoint(); // this shouldn't fail
LOG.info("--done checkpoint");
// Create count new files and checkpoints
for (int i=0; i<count; i++) {
// create a file
FileSystem fileSys = cluster.getFileSystem();
Path p = new Path("t" + i);
DFSTestUtil.createFile(fileSys, p, fileSize, fileSize,
blockSize, (short) 1, seed);
LOG.info("--file " + p.toString() + " created");
LOG.info("--doing checkpoint");
sn.doCheckpoint(); // this shouldn't fail
LOG.info("--done checkpoint");
}
} catch (IOException e) {
fail(StringUtils.stringifyException(e));
System.err.println("checkpoint failed");
@ -151,7 +157,36 @@ public class TestStartup {
sn.shutdown();
if(cluster!=null)
cluster.shutdown();
LOG.info("--file t1 created, cluster shutdown");
LOG.info("--cluster shutdown");
}
}
/**
* Corrupts the MD5 sum of the fsimage.
*
* @param corruptAll
* whether to corrupt one or all of the MD5 sums in the configured
* namedirs
* @throws IOException
*/
private void corruptFSImageMD5(boolean corruptAll) throws IOException {
List<URI> nameDirs = (List<URI>)FSNamesystem.getNamespaceDirs(config);
// Corrupt the md5 files in all the namedirs
for (URI uri: nameDirs) {
// Directory layout looks like:
// test/data/dfs/nameN/current/{fsimage,edits,...}
File nameDir = new File(uri.getPath());
File dfsDir = nameDir.getParentFile();
assertEquals(dfsDir.getName(), "dfs"); // make sure we got right dir
// Set the md5 file to all zeros
File imageFile = new File(nameDir,
Storage.STORAGE_DIR_CURRENT + "/"
+ NNStorage.getImageFileName(0));
MD5FileUtils.saveMD5File(imageFile, new MD5Hash(new byte[16]));
// Only need to corrupt one if !corruptAll
if (!corruptAll) {
break;
}
}
}
@ -165,7 +200,7 @@ public class TestStartup {
// get name dir and its length, then delete and recreate the directory
File dir = new File(nameDirs.get(0).getPath()); // has only one
this.fsimageLength = new File(new File(dir, "current"),
this.fsimageLength = new File(new File(dir, Storage.STORAGE_DIR_CURRENT),
NameNodeFile.IMAGE.getName()).length();
if(dir.exists() && !(FileUtil.fullyDelete(dir)))
@ -178,7 +213,7 @@ public class TestStartup {
dir = new File( nameEditsDirs.get(0).getPath()); //has only one
this.editsLength = new File(new File(dir, "current"),
this.editsLength = new File(new File(dir, Storage.STORAGE_DIR_CURRENT),
NameNodeFile.EDITS.getName()).length();
if(dir.exists() && !(FileUtil.fullyDelete(dir)))
@ -262,7 +297,7 @@ public class TestStartup {
config.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,
fileAsURI(new File(hdfsDir, "chkpt")).toString());
createCheckPoint();
createCheckPoint(1);
corruptNameNodeFiles();
checkNameNodeFiles();
@ -289,7 +324,7 @@ public class TestStartup {
config.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,
fileAsURI(new File(hdfsDir, "chkpt")).toString());
createCheckPoint();
createCheckPoint(1);
corruptNameNodeFiles();
checkNameNodeFiles();
}
@ -447,20 +482,18 @@ public class TestStartup {
FileSystem fs = cluster.getFileSystem();
fs.mkdirs(new Path("/test"));
// Directory layout looks like:
// test/data/dfs/nameN/current/{fsimage,edits,...}
File nameDir = new File(cluster.getNameDirs(0).iterator().next().getPath());
File dfsDir = nameDir.getParentFile();
assertEquals(dfsDir.getName(), "dfs"); // make sure we got right dir
LOG.info("Shutting down cluster #1");
cluster.shutdown();
cluster = null;
// Corrupt the md5 file to all 0s
File imageFile = new File(nameDir, "current/" + NNStorage.getImageFileName(0));
MD5FileUtils.saveMD5File(imageFile, new MD5Hash(new byte[16]));
// Corrupt the md5 files in all the namedirs
corruptFSImageMD5(true);
// Attach our own log appender so we can verify output
final LogVerificationAppender appender = new LogVerificationAppender();
final Logger logger = Logger.getRootLogger();
logger.addAppender(appender);
// Try to start a new cluster
LOG.info("\n===========================================\n" +
"Starting same cluster after simulated crash");
@ -471,9 +504,12 @@ public class TestStartup {
.build();
fail("Should not have successfully started with corrupt image");
} catch (IOException ioe) {
if (!ioe.getCause().getMessage().contains("is corrupt with MD5")) {
throw ioe;
}
GenericTestUtils.assertExceptionContains(
"Failed to load an FSImage file!", ioe);
int md5failures = appender.countExceptionsWithMessage(
" is corrupt with MD5 checksum of ");
// Two namedirs, so should have seen two failures
assertEquals(2, md5failures);
}
} finally {
if (cluster != null) {
@ -482,6 +518,21 @@ public class TestStartup {
}
}
@Test(timeout=30000)
public void testCorruptImageFallback() throws IOException {
// Create two checkpoints
createCheckPoint(2);
// Delete a single md5sum
corruptFSImageMD5(false);
// Should still be able to start
MiniDFSCluster cluster = new MiniDFSCluster.Builder(config)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
.build();
cluster.waitActive();
}
/**
* This test tests hosts include list contains host names. After namenode
* restarts, the still alive datanodes should not have any trouble in getting

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
@ -26,6 +27,8 @@ import java.io.OutputStream;
import java.net.URI;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -43,7 +46,10 @@ import org.apache.hadoop.hdfs.util.Canceler;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.apache.hadoop.util.ThreadUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@ -59,6 +65,8 @@ public class TestStandbyCheckpoints {
protected MiniDFSCluster cluster;
protected NameNode nn0, nn1;
protected FileSystem fs;
private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
@SuppressWarnings("rawtypes")
@Before
@ -231,6 +239,49 @@ public class TestStandbyCheckpoints {
assertTrue(canceledOne);
}
/**
* Make sure that clients will receive StandbyExceptions even when a
* checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer
* thread will have FSNS lock. Regression test for HDFS-4591.
*/
@Test(timeout=120000)
public void testStandbyExceptionThrownDuringCheckpoint() throws Exception {
// Set it up so that we know when the SBN checkpoint starts and ends.
FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
DelayAnswer answerer = new DelayAnswer(LOG);
Mockito.doAnswer(answerer).when(spyImage1)
.saveNamespace(Mockito.any(FSNamesystem.class),
Mockito.any(Canceler.class));
// Perform some edits and wait for a checkpoint to start on the SBN.
doEdits(0, 2000);
nn0.getRpcServer().rollEditLog();
answerer.waitForCall();
answerer.proceed();
assertTrue("SBN is not performing checkpoint but it should be.",
answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
// Make sure that the lock has actually been taken by the checkpointing
// thread.
ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
try {
// Perform an RPC to the SBN and make sure it throws a StandbyException.
nn1.getRpcServer().getFileInfo("/");
fail("Should have thrown StandbyException, but instead succeeded.");
} catch (StandbyException se) {
GenericTestUtils.assertExceptionContains("is not supported", se);
}
// Make sure that the checkpoint is still going on, implying that the client
// RPC to the SBN happened during the checkpoint.
assertTrue("SBN should have still been checkpointing.",
answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
answerer.waitForResult();
assertTrue("SBN should have finished checkpointing.",
answerer.getFireCount() == 1 && answerer.getResultCount() == 1);
}
private void doEdits(int start, int stop) throws IOException {
for (int i = start; i < stop; i++) {

View File

@ -143,6 +143,7 @@ public class TestStandbyIsHot {
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
// We read from the standby to watch block locations
HAUtil.setAllowStandbyReads(conf, true);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())

View File

@ -230,6 +230,11 @@ Release 2.0.5-beta - UNRELEASED
appropriately used and that on-disk segments are correctly sorted on
file-size. (Anty Rao and Ravi Prakash via acmurthy)
MAPREDUCE-4571. TestHsWebServicesJobs fails on jdk7. (tgraves via tucu)
MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid
fails with jdk7. (tgraves via tucu)
Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES
@ -807,6 +812,12 @@ Release 0.23.7 - UNRELEASED
MAPREDUCE-5023. History Server Web Services missing Job Counters (Ravi
Prakash via tgraves)
MAPREDUCE-5060. Fetch failures that time out only count against the first
map task (Robert Joseph Evans via jlowe)
MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered
(Jason Lowe via bobby)
Release 0.23.6 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -269,9 +269,17 @@ class YarnChild {
job.setBoolean("ipc.client.tcpnodelay", true);
job.setClass(MRConfig.TASK_LOCAL_OUTPUT_CLASS,
YarnOutputFiles.class, MapOutputFile.class);
// set the jobTokenFile into task
// set the jobToken and shuffle secrets into task
task.setJobTokenSecret(
JobTokenSecretManager.createSecretKey(jt.getPassword()));
byte[] shuffleSecret = TokenCache.getShuffleSecretKey(credentials);
if (shuffleSecret == null) {
LOG.warn("Shuffle secret missing from task credentials."
+ " Using job token secret as shuffle secret.");
shuffleSecret = jt.getPassword();
}
task.setShuffleSecret(
JobTokenSecretManager.createSecretKey(shuffleSecret));
// setup the child's MRConfig.LOCAL_DIR.
configureLocalDirs(task, job);

View File

@ -55,6 +55,7 @@ import org.apache.hadoop.mapreduce.jobhistory.JobHistoryCopyService;
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
@ -339,8 +340,15 @@ public class MRAppMaster extends CompositeService {
boolean recoveryEnabled = conf.getBoolean(
MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
boolean recoverySupportedByCommitter = committer.isRecoverySupported();
// If a shuffle secret was not provided by the job client then this app
// attempt will generate one. However that disables recovery if there
// are reducers as the shuffle secret would be app attempt specific.
boolean shuffleKeyValidForRecovery = (numReduceTasks > 0 &&
TokenCache.getShuffleSecretKey(fsTokens) != null);
if (recoveryEnabled && recoverySupportedByCommitter
&& appAttemptID.getAttemptId() > 1) {
&& shuffleKeyValidForRecovery && appAttemptID.getAttemptId() > 1) {
LOG.info("Recovery is enabled. "
+ "Will try to recover from previous life on best effort basis.");
recoveryServ = createRecoveryService(context);
@ -351,7 +359,8 @@ public class MRAppMaster extends CompositeService {
} else {
LOG.info("Not starting RecoveryService: recoveryEnabled: "
+ recoveryEnabled + " recoverySupportedByCommitter: "
+ recoverySupportedByCommitter + " ApplicationAttemptID: "
+ recoverySupportedByCommitter + " shuffleKeyValidForRecovery: "
+ shuffleKeyValidForRecovery + " ApplicationAttemptID: "
+ appAttemptID.getAttemptId());
dispatcher = createDispatcher();
addIfService(dispatcher);
@ -471,7 +480,11 @@ public class MRAppMaster extends CompositeService {
protected FileSystem getFileSystem(Configuration conf) throws IOException {
return FileSystem.get(conf);
}
protected Credentials getCredentials() {
return fsTokens;
}
/**
* clean up staging directories for the job.
* @throws IOException

View File

@ -1350,13 +1350,13 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
LOG.info("Adding job token for " + oldJobIDString
+ " to jobTokenSecretManager");
// Upload the jobTokens onto the remote FS so that ContainerManager can
// localize it to be used by the Containers(tasks)
Credentials tokenStorage = new Credentials();
TokenCache.setJobToken(job.jobToken, tokenStorage);
if (UserGroupInformation.isSecurityEnabled()) {
tokenStorage.addAll(job.fsTokens);
// If the job client did not setup the shuffle secret then reuse
// the job token secret for the shuffle.
if (TokenCache.getShuffleSecretKey(job.fsTokens) == null) {
LOG.warn("Shuffle secret key missing from job credentials."
+ " Using job token secret as shuffle secret.");
TokenCache.setShuffleSecretKey(job.jobToken.getPassword(),
job.fsTokens);
}
}

View File

@ -702,10 +702,21 @@ public abstract class TaskAttemptImpl implements
ByteBuffer.wrap(containerTokens_dob.getData(), 0,
containerTokens_dob.getLength());
// Add shuffle token
// Add shuffle secret key
// The secret key is converted to a JobToken to preserve backwards
// compatibility with an older ShuffleHandler running on an NM.
LOG.info("Putting shuffle token in serviceData");
byte[] shuffleSecret = TokenCache.getShuffleSecretKey(credentials);
if (shuffleSecret == null) {
LOG.warn("Cannot locate shuffle secret in credentials."
+ " Using job token as shuffle secret.");
shuffleSecret = jobToken.getPassword();
}
Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>(
jobToken.getIdentifier(), shuffleSecret, jobToken.getKind(),
jobToken.getService());
serviceData.put(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID,
ShuffleHandler.serializeServiceData(jobToken));
ShuffleHandler.serializeServiceData(shuffleToken));
Apps.addToEnvironment(
environment,

View File

@ -111,7 +111,7 @@ public class CountersBlock extends HtmlBlock {
th().$title(g.getName()).$class("ui-state-default").
_(fixGroupDisplayName(g.getDisplayName()))._().
td().$class(C_TABLE).
table(".dt-counters").
table(".dt-counters").$id(job.getID()+"."+g.getName()).
thead().
tr().th(".name", "Name");

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
import org.apache.hadoop.mapreduce.jobhistory.NormalizedResourceEvent;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
@ -144,6 +145,9 @@ public class MRApp extends MRAppMaster {
@Override
protected void downloadTokensAndSetupUGI(Configuration conf) {
// Fake a shuffle secret that normally is provided by the job client.
String shuffleSecret = "fake-shuffle-secret";
TokenCache.setShuffleSecretKey(shuffleSecret.getBytes(), getCredentials());
}
private static ApplicationAttemptId getApplicationAttemptId(

View File

@ -900,6 +900,117 @@ public class TestRecovery {
}
@Test(timeout=30000)
public void testRecoveryWithoutShuffleSecret() throws Exception {
int runCount = 0;
MRApp app = new MRAppNoShuffleSecret(2, 1, false,
this.getClass().getName(), true, ++runCount);
Configuration conf = new Configuration();
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean("mapred.reducer.new-api", true);
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
conf.set(FileOutputFormat.OUTDIR, outputDir.toString());
Job job = app.submit(conf);
app.waitForState(job, JobState.RUNNING);
//all maps would be running
Assert.assertEquals("No of tasks not correct",
3, job.getTasks().size());
Iterator<Task> it = job.getTasks().values().iterator();
Task mapTask1 = it.next();
Task mapTask2 = it.next();
Task reduceTask = it.next();
// all maps must be running
app.waitForState(mapTask1, TaskState.RUNNING);
app.waitForState(mapTask2, TaskState.RUNNING);
TaskAttempt task1Attempt = mapTask1.getAttempts().values().iterator().next();
TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();
//before sending the TA_DONE, event make sure attempt has come to
//RUNNING state
app.waitForState(task1Attempt, TaskAttemptState.RUNNING);
app.waitForState(task2Attempt, TaskAttemptState.RUNNING);
// reduces must be in NEW state
Assert.assertEquals("Reduce Task state not correct",
TaskState.RUNNING, reduceTask.getReport().getTaskState());
//send the done signal to the 1st map attempt
app.getContext().getEventHandler().handle(
new TaskAttemptEvent(
task1Attempt.getID(),
TaskAttemptEventType.TA_DONE));
//wait for first map task to complete
app.waitForState(mapTask1, TaskState.SUCCEEDED);
//stop the app
app.stop();
//in recovery the 1st map should NOT be recovered from previous run
//since the shuffle secret was not provided with the job credentials
//and had to be rolled per app attempt
app = new MRAppNoShuffleSecret(2, 1, false,
this.getClass().getName(), false, ++runCount);
conf = new Configuration();
conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean("mapred.reducer.new-api", true);
conf.set(FileOutputFormat.OUTDIR, outputDir.toString());
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
job = app.submit(conf);
app.waitForState(job, JobState.RUNNING);
//all maps would be running
Assert.assertEquals("No of tasks not correct",
3, job.getTasks().size());
it = job.getTasks().values().iterator();
mapTask1 = it.next();
mapTask2 = it.next();
reduceTask = it.next();
app.waitForState(mapTask1, TaskState.RUNNING);
app.waitForState(mapTask2, TaskState.RUNNING);
task2Attempt = mapTask2.getAttempts().values().iterator().next();
//before sending the TA_DONE, event make sure attempt has come to
//RUNNING state
app.waitForState(task2Attempt, TaskAttemptState.RUNNING);
//send the done signal to the 2nd map task
app.getContext().getEventHandler().handle(
new TaskAttemptEvent(
mapTask2.getAttempts().values().iterator().next().getID(),
TaskAttemptEventType.TA_DONE));
//wait to get it completed
app.waitForState(mapTask2, TaskState.SUCCEEDED);
//verify first map task is still running
app.waitForState(mapTask1, TaskState.RUNNING);
//send the done signal to the 2nd map task
app.getContext().getEventHandler().handle(
new TaskAttemptEvent(
mapTask1.getAttempts().values().iterator().next().getID(),
TaskAttemptEventType.TA_DONE));
//wait to get it completed
app.waitForState(mapTask1, TaskState.SUCCEEDED);
//wait for reduce to be running before sending done
app.waitForState(reduceTask, TaskState.RUNNING);
//send the done signal to the reduce
app.getContext().getEventHandler().handle(
new TaskAttemptEvent(
reduceTask.getAttempts().values().iterator().next().getID(),
TaskAttemptEventType.TA_DONE));
app.waitForState(job, JobState.SUCCEEDED);
app.verifyCompleted();
}
private void writeBadOutput(TaskAttempt attempt, Configuration conf)
throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf,
@ -1019,6 +1130,18 @@ public class TestRecovery {
}
}
static class MRAppNoShuffleSecret extends MRAppWithHistory {
public MRAppNoShuffleSecret(int maps, int reduces, boolean autoComplete,
String testName, boolean cleanOnStart, int startCount) {
super(maps, reduces, autoComplete, testName, cleanOnStart, startCount);
}
@Override
protected void downloadTokensAndSetupUGI(Configuration conf) {
// do NOT put a shuffle secret in the job credentials
}
}
public static void main(String[] arg) throws Exception {
TestRecovery test = new TestRecovery();
test.testCrashed();

View File

@ -491,7 +491,7 @@ public class TestJobImpl {
MRAppMetrics mrAppMetrics = MRAppMetrics.create();
JobImpl job = new JobImpl(jobId, Records
.newRecord(ApplicationAttemptId.class), conf, mock(EventHandler.class),
null, mock(JobTokenSecretManager.class), null, null, null,
null, new JobTokenSecretManager(), new Credentials(), null, null,
mrAppMetrics, true, null, 0, null, null, null, null);
InitTransition initTransition = getInitTransition(2);
JobEvent mockJobEvent = mock(JobEvent.class);

View File

@ -185,6 +185,7 @@ abstract public class Task implements Writable, Configurable {
private int numSlotsRequired;
protected TaskUmbilicalProtocol umbilical;
protected SecretKey tokenSecret;
protected SecretKey shuffleSecret;
protected GcTimeUpdater gcUpdater;
////////////////////////////////////////////
@ -261,7 +262,22 @@ abstract public class Task implements Writable, Configurable {
return this.tokenSecret;
}
/**
* Set the secret key used to authenticate the shuffle
* @param shuffleSecret the secret
*/
public void setShuffleSecret(SecretKey shuffleSecret) {
this.shuffleSecret = shuffleSecret;
}
/**
* Get the secret key used to authenticate the shuffle
* @return the shuffle secret
*/
public SecretKey getShuffleSecret() {
return this.shuffleSecret;
}
/**
* Get the index of this task within the job.
* @return the integer part of the task id

View File

@ -23,11 +23,15 @@ import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -62,6 +66,8 @@ import com.google.common.base.Charsets;
@InterfaceStability.Unstable
class JobSubmitter {
protected static final Log LOG = LogFactory.getLog(JobSubmitter.class);
private static final String SHUFFLE_KEYGEN_ALGORITHM = "HmacSHA1";
private static final int SHUFFLE_KEY_LENGTH = 64;
private FileSystem jtFs;
private ClientProtocol submitClient;
private String submitHostName;
@ -359,6 +365,20 @@ class JobSubmitter {
populateTokenCache(conf, job.getCredentials());
// generate a secret to authenticate shuffle transfers
if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
KeyGenerator keyGen;
try {
keyGen = KeyGenerator.getInstance(SHUFFLE_KEYGEN_ALGORITHM);
keyGen.init(SHUFFLE_KEY_LENGTH);
} catch (NoSuchAlgorithmException e) {
throw new IOException("Error generating shuffle secret key", e);
}
SecretKey shuffleKey = keyGen.generateKey();
TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(),
job.getCredentials());
}
copyAndConfigureFiles(job, submitJobDir);
Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);

View File

@ -154,7 +154,8 @@ public class TokenCache {
*/
@InterfaceAudience.Private
public static final String JOB_TOKENS_FILENAME = "mapreduce.job.jobTokenFile";
private static final Text JOB_TOKEN = new Text("ShuffleAndJobToken");
private static final Text JOB_TOKEN = new Text("JobToken");
private static final Text SHUFFLE_TOKEN = new Text("MapReduceShuffleToken");
/**
* load job token from a file
@ -194,4 +195,14 @@ public class TokenCache {
public static Token<JobTokenIdentifier> getJobToken(Credentials credentials) {
return (Token<JobTokenIdentifier>) credentials.getToken(JOB_TOKEN);
}
@InterfaceAudience.Private
public static void setShuffleSecretKey(byte[] key, Credentials credentials) {
credentials.addSecretKey(SHUFFLE_TOKEN, key);
}
@InterfaceAudience.Private
public static byte[] getShuffleSecretKey(Credentials credentials) {
return getSecretKey(credentials, SHUFFLE_TOKEN);
}
}

View File

@ -82,7 +82,7 @@ class Fetcher<K,V> extends Thread {
private final int connectionTimeout;
private final int readTimeout;
private final SecretKey jobTokenSecret;
private final SecretKey shuffleSecretKey;
private volatile boolean stopped = false;
@ -92,7 +92,7 @@ class Fetcher<K,V> extends Thread {
public Fetcher(JobConf job, TaskAttemptID reduceId,
ShuffleScheduler<K,V> scheduler, MergeManager<K,V> merger,
Reporter reporter, ShuffleClientMetrics metrics,
ExceptionReporter exceptionReporter, SecretKey jobTokenSecret) {
ExceptionReporter exceptionReporter, SecretKey shuffleKey) {
this.reporter = reporter;
this.scheduler = scheduler;
this.merger = merger;
@ -100,7 +100,7 @@ class Fetcher<K,V> extends Thread {
this.exceptionReporter = exceptionReporter;
this.id = ++nextId;
this.reduce = reduceId.getTaskID().getId();
this.jobTokenSecret = jobTokenSecret;
this.shuffleSecretKey = shuffleKey;
ioErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME,
ShuffleErrors.IO_ERROR.toString());
wrongLengthErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME,
@ -221,7 +221,6 @@ class Fetcher<K,V> extends Thread {
// Construct the url and connect
DataInputStream input;
boolean connectSucceeded = false;
try {
URL url = getMapOutputURL(host, maps);
@ -229,7 +228,8 @@ class Fetcher<K,V> extends Thread {
// generate hash of the url
String msgToEncode = SecureShuffleUtils.buildMsgFrom(url);
String encHash = SecureShuffleUtils.hashFromString(msgToEncode, jobTokenSecret);
String encHash = SecureShuffleUtils.hashFromString(msgToEncode,
shuffleSecretKey);
// put url hash into http header
connection.addRequestProperty(
@ -237,7 +237,6 @@ class Fetcher<K,V> extends Thread {
// set the read timeout
connection.setReadTimeout(readTimeout);
connect(connection, connectionTimeout);
connectSucceeded = true;
input = new DataInputStream(connection.getInputStream());
// Validate response code
@ -255,7 +254,7 @@ class Fetcher<K,V> extends Thread {
}
LOG.debug("url="+msgToEncode+";encHash="+encHash+";replyHash="+replyHash);
// verify that replyHash is HMac of encHash
SecureShuffleUtils.verifyReply(replyHash, encHash, jobTokenSecret);
SecureShuffleUtils.verifyReply(replyHash, encHash, shuffleSecretKey);
LOG.info("for url="+msgToEncode+" sent hash and received reply");
} catch (IOException ie) {
boolean connectExcpt = ie instanceof ConnectException;
@ -265,18 +264,10 @@ class Fetcher<K,V> extends Thread {
// If connect did not succeed, just mark all the maps as failed,
// indirectly penalizing the host
if (!connectSucceeded) {
for(TaskAttemptID left: remaining) {
scheduler.copyFailed(left, host, connectSucceeded, connectExcpt);
}
} else {
// If we got a read error at this stage, it implies there was a problem
// with the first map, typically lost map. So, penalize only that map
// and add the rest
TaskAttemptID firstMap = maps.get(0);
scheduler.copyFailed(firstMap, host, connectSucceeded, connectExcpt);
for(TaskAttemptID left: remaining) {
scheduler.copyFailed(left, host, false, connectExcpt);
}
// Add back all the remaining maps, WITHOUT marking them as failed
for(TaskAttemptID left: remaining) {
scheduler.putBackKnownMapOutput(host, left);

View File

@ -108,7 +108,7 @@ public class Shuffle<K, V> implements ShuffleConsumerPlugin<K, V>, ExceptionRepo
for (int i=0; i < numFetchers; ++i) {
fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger,
reporter, metrics, this,
reduceTask.getJobTokenSecret());
reduceTask.getShuffleSecret());
fetchers[i].start();
}

View File

@ -26,6 +26,7 @@ import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.ArrayList;
@ -70,6 +71,54 @@ public class TestFetcher {
}
}
@SuppressWarnings("unchecked")
@Test(timeout=30000)
public void testCopyFromHostConnectionTimeout() throws Exception {
LOG.info("testCopyFromHostConnectionTimeout");
JobConf job = new JobConf();
TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
Reporter r = mock(Reporter.class);
ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
ExceptionReporter except = mock(ExceptionReporter.class);
SecretKey key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
HttpURLConnection connection = mock(HttpURLConnection.class);
when(connection.getInputStream()).thenThrow(
new SocketTimeoutException("This is a fake timeout :)"));
Counters.Counter allErrs = mock(Counters.Counter.class);
when(r.getCounter(anyString(), anyString()))
.thenReturn(allErrs);
Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
r, metrics, except, key, connection);
MapHost host = new MapHost("localhost", "http://localhost:8080/");
ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
TaskAttemptID map1ID = TaskAttemptID.forName("attempt_0_1_m_1_1");
maps.add(map1ID);
TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1");
maps.add(map2ID);
when(ss.getMapsForHost(host)).thenReturn(maps);
String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg=";
underTest.copyFromHost(host);
verify(connection)
.addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH,
encHash);
verify(allErrs).increment(1);
verify(ss).copyFailed(map1ID, host, false, false);
verify(ss).copyFailed(map2ID, host, false, false);
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID));
verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID));
}
@SuppressWarnings("unchecked")
@Test
public void testCopyFromHostBogusHeader() throws Exception {

View File

@ -65,8 +65,12 @@ public class HsTasksBlock extends HtmlBlock {
if (!symbol.isEmpty()) {
type = MRApps.taskType(symbol);
}
THEAD<TABLE<Hamlet>> thead = html.table("#tasks").thead();
THEAD<TABLE<Hamlet>> thead;
if(type != null)
thead = html.table("#"+app.getJob().getID()
+ type).$class("dt-tasks").thead();
else
thead = html.table("#tasks").thead();
//Create the spanning row
int attemptColSpan = type == TaskType.REDUCE ? 8 : 3;
thead.tr().

View File

@ -22,7 +22,9 @@ import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_TYPE;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.ACCORDION;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_SELECTOR;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initSelector;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.postInitID;
import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit;
@ -42,6 +44,8 @@ public class HsTasksPage extends HsView {
@Override protected void preHead(Page.HTML<_> html) {
commonPreHead(html);
set(DATATABLES_ID, "tasks");
set(DATATABLES_SELECTOR, ".dt-tasks" );
set(initSelector(DATATABLES), tasksTableInit());
set(initID(ACCORDION, "nav"), "{autoHeight:false, active:1}");
set(initID(DATATABLES, "tasks"), tasksTableInit());
set(postInitID(DATATABLES, "tasks"), jobsPostTableInit());

View File

@ -77,13 +77,18 @@ public class MockHistoryJobs extends MockJobs {
for(Map.Entry<JobId, Job> entry: mocked.entrySet()) {
JobId id = entry.getKey();
Job j = entry.getValue();
ret.full.put(id, new MockCompletedJob(j));
JobReport report = j.getReport();
MockCompletedJob mockJob = new MockCompletedJob(j);
// use MockCompletedJob to set everything below to make sure
// consistent with what history server would do
ret.full.put(id, mockJob);
JobReport report = mockJob.getReport();
JobIndexInfo info = new JobIndexInfo(report.getStartTime(),
report.getFinishTime(), j.getUserName(), j.getName(), id,
j.getCompletedMaps(), j.getCompletedReduces(), String.valueOf(j.getState()));
info.setQueueName(j.getQueueName());
report.getFinishTime(), mockJob.getUserName(), mockJob.getName(), id,
mockJob.getCompletedMaps(), mockJob.getCompletedReduces(),
String.valueOf(mockJob.getState()));
info.setQueueName(mockJob.getQueueName());
ret.partial.put(id, new PartialJob(info, id));
}
return ret;
}
@ -99,12 +104,16 @@ public class MockHistoryJobs extends MockJobs {
@Override
public int getCompletedMaps() {
return job.getCompletedMaps();
// we always return total since this is history server
// and PartialJob also assumes completed - total
return job.getTotalMaps();
}
@Override
public int getCompletedReduces() {
return job.getCompletedReduces();
// we always return total since this is history server
// and PartialJob also assumes completed - total
return job.getTotalReduces();
}
@Override

View File

@ -117,6 +117,7 @@ public class TestHsWebServicesJobs extends JerseyTest {
fullJobs = jobs.full;
}
TestAppContext(int appid, int numJobs, int numTasks, int numAttempts) {
this(appid, numJobs, numTasks, numAttempts, false);
}
@ -411,7 +412,8 @@ public class TestHsWebServicesJobs extends JerseyTest {
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("job");
VerifyJobsUtils.verifyHsJob(info, jobsMap.get(id));
VerifyJobsUtils.verifyHsJob(info, appContext.getJob(id));
}
}
@ -613,7 +615,7 @@ public class TestHsWebServicesJobs extends JerseyTest {
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("jobCounters");
verifyHsJobCounters(info, jobsMap.get(id));
verifyHsJobCounters(info, appContext.getJob(id));
}
}
@ -631,7 +633,7 @@ public class TestHsWebServicesJobs extends JerseyTest {
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("jobCounters");
verifyHsJobCounters(info, jobsMap.get(id));
verifyHsJobCounters(info, appContext.getJob(id));
}
}
@ -689,7 +691,7 @@ public class TestHsWebServicesJobs extends JerseyTest {
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("jobCounters");
verifyHsJobCounters(info, jobsMap.get(id));
verifyHsJobCounters(info, appContext.getJob(id));
}
}
@ -711,7 +713,7 @@ public class TestHsWebServicesJobs extends JerseyTest {
is.setCharacterStream(new StringReader(xml));
Document dom = db.parse(is);
NodeList info = dom.getElementsByTagName("jobCounters");
verifyHsJobCountersXML(info, jobsMap.get(id));
verifyHsJobCountersXML(info, appContext.getJob(id));
}
}

View File

@ -284,9 +284,9 @@ public class TestHsWebServicesJobsQuery extends JerseyTest {
String type = exception.getString("exception");
String classname = exception.getString("javaClassName");
WebServicesTestUtils
.checkStringMatch(
.checkStringContains(
"exception message",
"No enum const class org.apache.hadoop.mapreduce.v2.api.records.JobState.InvalidState",
"org.apache.hadoop.mapreduce.v2.api.records.JobState.InvalidState",
message);
WebServicesTestUtils.checkStringMatch("exception type",
"IllegalArgumentException", type);

View File

@ -47,6 +47,7 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.IFile.Writer;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.Counters.Group;
@ -106,7 +107,7 @@ public class TestPipeApplication {
Token<ApplicationTokenIdentifier> token = new Token<ApplicationTokenIdentifier>(
"user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
"service"));
conf.getCredentials().addToken(new Text("ShuffleAndJobToken"), token);
TokenCache.setJobToken(token, conf.getCredentials());
conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
TestTaskReporter reporter = new TestTaskReporter();
PipesMapRunner<FloatWritable, NullWritable, IntWritable, Text> runner = new PipesMapRunner<FloatWritable, NullWritable, IntWritable, Text>();
@ -171,7 +172,7 @@ public class TestPipeApplication {
"user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
"service"));
conf.getCredentials().addToken(new Text("ShuffleAndJobToken"), token);
TokenCache.setJobToken(token, conf.getCredentials());
FakeCollector output = new FakeCollector(new Counters.Counter(),
new Progress());
FileSystem fs = new RawLocalFileSystem();
@ -391,7 +392,7 @@ public class TestPipeApplication {
Token<ApplicationTokenIdentifier> token = new Token<ApplicationTokenIdentifier>(
"user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
"service"));
conf.getCredentials().addToken(new Text("ShuffleAndJobToken"), token);
TokenCache.setJobToken(token, conf.getCredentials());
File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeReducerStub");
conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());

View File

@ -37,7 +37,7 @@ public class ProtocMojo extends AbstractMojo {
private MavenProject project;
@Parameter
private List<File> imports;
private File[] imports;
@Parameter(defaultValue="${project.build.directory}/generated-sources/java")
private File output;
@ -83,4 +83,4 @@ public class ProtocMojo extends AbstractMojo {
project.addCompileSourceRoot(output.getAbsolutePath());
}
}
}

View File

@ -46,7 +46,7 @@
<hadoop.assemblies.version>${project.version}</hadoop.assemblies.version>
<commons-daemon.version>1.0.3</commons-daemon.version>
<commons-daemon.version>1.0.13</commons-daemon.version>
<test.build.dir>${project.build.directory}/test-dir</test.build.dir>
<test.build.data>${test.build.dir}</test.build.data>
@ -864,15 +864,6 @@
<properties>
<build.platform>${os.name}-${os.arch}-${sun.arch.data.model}</build.platform>
</properties>
<dependencies>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.6</version>
<scope>system</scope>
<systemPath>${java.home}/../lib/tools.jar</systemPath>
</dependency>
</dependencies>
</profile>
<profile>
<id>os.mac</id>

View File

@ -257,7 +257,7 @@ public class TestGridmixSummary {
qPath.toString(), es.getInputTraceLocation());
// test expected data size
assertEquals("Mismatch in expected data size",
"1.0k", es.getExpectedDataSize());
"1 K", es.getExpectedDataSize());
// test input data statistics
assertEquals("Mismatch in input data statistics",
ExecutionSummarizer.stringifyDataStatistics(dataStats),
@ -272,7 +272,7 @@ public class TestGridmixSummary {
es.finalize(factory, testTraceFile.toString(), 1024*1024*1024*10L, resolver,
dataStats, conf);
assertEquals("Mismatch in expected data size",
"10.0g", es.getExpectedDataSize());
"10 G", es.getExpectedDataSize());
// test trace signature uniqueness
// touch the trace file
@ -389,4 +389,4 @@ public class TestGridmixSummary {
assertEquals("Cluster summary test failed!", 0,
cs.getNumBlacklistedTrackers());
}
}
}

View File

@ -66,6 +66,12 @@ Release 2.0.5-beta - UNRELEASED
the per-application page are translated to html line breaks. (Omkar Vinit
Joshi via vinodkv)
YARN-198. Added a link to RM pages from the NodeManager web app. (Jian He
via vinodkv)
YARN-237. Refreshing the RM page forgets how many rows I had in my
Datatables (jian he via bobby)
OPTIMIZATIONS
BUG FIXES
@ -91,6 +97,9 @@ Release 2.0.5-beta - UNRELEASED
YARN-376. Fixes a bug which would prevent the NM knowing about completed
containers and applications. (Jason Lowe via sseth)
YARN-196. Nodemanager should be more robust in handling connection failure
to ResourceManager when a cluster is started (Xuan Gong via hitesh)
Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES
@ -396,6 +405,8 @@ Release 0.23.7 - UNRELEASED
YARN-443. allow OS scheduling priority of NM to be different than the
containers it launches (tgraves)
YARN-468. coverage fix for org.apache.hadoop.yarn.server.webproxy.amfilter
(Aleksey Gorshkov via bobby)
OPTIMIZATIONS

View File

@ -621,6 +621,20 @@ public class YarnConfiguration extends Configuration {
public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
2000;
/** Max time to wait to establish a connection to RM when NM starts
*/
public static final String RESOURCEMANAGER_CONNECT_WAIT_SECS =
NM_PREFIX + "resourcemanager.connect.wait.secs";
public static final int DEFAULT_RESOURCEMANAGER_CONNECT_WAIT_SECS =
15*60;
/** Time interval between each NM attempt to connect to RM
*/
public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS =
NM_PREFIX + "resourcemanager.connect.retry_interval.secs";
public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
= 30;
/**
* CLASSPATH for YARN applications. A comma-separated list of CLASSPATH
* entries

View File

@ -107,12 +107,21 @@ public class JQueryUI extends HtmlBlock {
protected void initDataTables(List<String> list) {
String defaultInit = "{bJQueryUI: true, sPaginationType: 'full_numbers'}";
String stateSaveInit = "bStateSave : true, " +
"\"fnStateSave\": function (oSettings, oData) { " +
"sessionStorage.setItem( oSettings.sTableId, JSON.stringify(oData) ); }, " +
"\"fnStateLoad\": function (oSettings) { " +
"return JSON.parse( sessionStorage.getItem(oSettings.sTableId) );}, ";
for (String id : split($(DATATABLES_ID))) {
if (Html.isValidId(id)) {
String init = $(initID(DATATABLES, id));
if (init.isEmpty()) {
init = defaultInit;
}
// for inserting stateSaveInit
int pos = init.indexOf('{') + 1;
init = new StringBuffer(init).insert(pos, stateSaveInit).toString();
list.add(join(id,"DataTable = $('#", id, "').dataTable(", init,
").fnSetFilteringDelay(188);"));
String postInit = $(postInitID(DATATABLES, id));
@ -126,9 +135,12 @@ public class JQueryUI extends HtmlBlock {
String init = $(initSelector(DATATABLES));
if (init.isEmpty()) {
init = defaultInit;
}
}
int pos = init.indexOf('{') + 1;
init = new StringBuffer(init).insert(pos, stateSaveInit).toString();
list.add(join(" $('", escapeJavaScript(selector), "').dataTable(", init,
").fnSetFilteringDelay(288);"));
").fnSetFilteringDelay(288);"));
}
}

View File

@ -597,6 +597,20 @@
<value>2000</value>
</property>
<property>
<description>Max time, in seconds, to wait to establish a connection to RM when NM starts.
The NM will shutdown if it cannot connect to RM within the specified max time period.
If the value is set as -1, then NM will retry forever.</description>
<name>yarn.nodemanager.resourcemanager.connect.wait.secs</name>
<value>900</value>
</property>
<property>
<description>Time interval, in seconds, between each NM attempt to connect to RM.</description>
<name>yarn.nodemanager.resourcemanager.connect.retry_interval.secs</name>
<value>30</value>
</property>
<!--Map Reduce configuration-->
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>

View File

@ -350,7 +350,7 @@ public class NodeManager extends CompositeService
ContainerManagerImpl getContainerManager() {
return containerManager;
}
public static void main(String[] args) {
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);

View File

@ -151,7 +151,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
YarnConfiguration.DEFAULT_NM_WEBAPP_ADDRESS,
YarnConfiguration.DEFAULT_NM_WEBAPP_PORT);
try {
// this.hostName = InetAddress.getLocalHost().getCanonicalHostName();
this.httpPort = httpBindAddress.getPort();
// Registration has to be in start so that ContainerManager can get the
// perNM tokens needed to authenticate ContainerTokens.
@ -189,15 +188,84 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
}
private void registerWithRM() throws YarnRemoteException {
this.resourceTracker = getRMClient();
LOG.info("Connecting to ResourceManager at " + this.rmAddress);
RegisterNodeManagerRequest request = recordFactory.newRecordInstance(RegisterNodeManagerRequest.class);
Configuration conf = getConfig();
long rmConnectWaitMS =
conf.getInt(
YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS,
YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_WAIT_SECS)
* 1000;
long rmConnectionRetryIntervalMS =
conf.getLong(
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
YarnConfiguration
.DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS)
* 1000;
if(rmConnectionRetryIntervalMS < 0) {
throw new YarnException("Invalid Configuration. " +
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS +
" should not be negative.");
}
boolean waitForEver = (rmConnectWaitMS == -1000);
if(! waitForEver) {
if(rmConnectWaitMS < 0) {
throw new YarnException("Invalid Configuration. " +
YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS +
" can be -1, but can not be other negative numbers");
}
//try connect once
if(rmConnectWaitMS < rmConnectionRetryIntervalMS) {
LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS
+ " is smaller than "
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
+ ". Only try connect once.");
rmConnectWaitMS = 0;
}
}
int rmRetryCount = 0;
long waitStartTime = System.currentTimeMillis();
RegisterNodeManagerRequest request =
recordFactory.newRecordInstance(RegisterNodeManagerRequest.class);
request.setHttpPort(this.httpPort);
request.setResource(this.totalResource);
request.setNodeId(this.nodeId);
RegistrationResponse regResponse =
this.resourceTracker.registerNodeManager(request).getRegistrationResponse();
RegistrationResponse regResponse;
while(true) {
try {
rmRetryCount++;
LOG.info("Connecting to ResourceManager at " + this.rmAddress
+ ". current no. of attempts is " + rmRetryCount);
this.resourceTracker = getRMClient();
regResponse =
this.resourceTracker.registerNodeManager(request)
.getRegistrationResponse();
break;
} catch(Throwable e) {
LOG.warn("Trying to connect to ResourceManager, " +
"current no. of failed attempts is "+rmRetryCount);
if(System.currentTimeMillis() - waitStartTime < rmConnectWaitMS
|| waitForEver) {
try {
LOG.info("Sleeping for " + rmConnectionRetryIntervalMS/1000
+ " seconds before next connection retry to RM");
Thread.sleep(rmConnectionRetryIntervalMS);
} catch(InterruptedException ex) {
//done nothing
}
} else {
String errorMessage = "Failed to Connect to RM, " +
"no. of failed attempts is "+rmRetryCount;
LOG.error(errorMessage,e);
throw new YarnException(errorMessage,e);
}
}
}
// if the Resourcemanager instructs NM to shutdown.
if (NodeAction.SHUTDOWN.equals(regResponse.getNodeAction())) {
throw new YarnException(

View File

@ -18,16 +18,32 @@
package org.apache.hadoop.yarn.server.nodemanager.webapp;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.webapp.YarnWebParams;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
import com.google.inject.Inject;
public class NavBlock extends HtmlBlock implements YarnWebParams {
private Configuration conf;
@Inject
public NavBlock(Configuration conf) {
this.conf = conf;
}
@Override
protected void render(Block html) {
html
String RMWebAppURL = YarnConfiguration.getRMWebAppURL(this.conf);
html
.div("#nav")
.h3()._("NodeManager")._() // TODO: Problem if no header like this
.h3()._("ResourceManager")._()
.ul()
.li().a(RMWebAppURL, "RM Home")._()._()
.h3()._("NodeManager")._() // TODO: Problem if no header like this
.ul()
.li()
.a(url("node"), "Node Information")._()
@ -37,7 +53,7 @@ public class NavBlock extends HtmlBlock implements YarnWebParams {
.li()
.a(url("allContainers"), "List of Containers")._()
._()
.h3("Tools")
.h3("Tools")
.ul()
.li().a("/conf", "Configuration")._()
.li().a("/logs", "Local logs")._()

View File

@ -42,7 +42,7 @@ public class NodePage extends NMView {
protected void commonPreHead(HTML<_> html) {
super.commonPreHead(html);
set(initID(ACCORDION, "nav"), "{autoHeight:false, active:0}");
set(initID(ACCORDION, "nav"), "{autoHeight:false, active:1}");
}
@Override

View File

@ -267,6 +267,36 @@ public class TestNodeStatusUpdater {
}
}
private class MyNodeStatusUpdater4 extends NodeStatusUpdaterImpl {
public ResourceTracker resourceTracker =
new MyResourceTracker(this.context);
private Context context;
private final long waitStartTime;
private final long rmStartIntervalMS;
private final boolean rmNeverStart;
public MyNodeStatusUpdater4(Context context, Dispatcher dispatcher,
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
long rmStartIntervalMS, boolean rmNeverStart) {
super(context, dispatcher, healthChecker, metrics);
this.context = context;
this.waitStartTime = System.currentTimeMillis();
this.rmStartIntervalMS = rmStartIntervalMS;
this.rmNeverStart = rmNeverStart;
}
@Override
protected ResourceTracker getRMClient() {
if(System.currentTimeMillis() - waitStartTime <= rmStartIntervalMS
|| rmNeverStart) {
throw new YarnException("Faking RM start failure as start " +
"delay timer has not expired.");
} else {
return resourceTracker;
}
}
}
private class MyNodeManager extends NodeManager {
private MyNodeStatusUpdater3 nodeStatusUpdater;
@ -580,6 +610,73 @@ public class TestNodeStatusUpdater {
+ "Recieved SHUTDOWN signal from Resourcemanager ,Registration of NodeManager failed");
}
@Test (timeout = 15000)
public void testNMConnectionToRM() {
final long delta = 1500;
final long connectionWaitSecs = 5;
final long connectionRetryIntervalSecs = 1;
//Waiting for rmStartIntervalMS, RM will be started
final long rmStartIntervalMS = 2*1000;
YarnConfiguration conf = createNMConfig();
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS,
connectionWaitSecs);
conf.setLong(YarnConfiguration
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
connectionRetryIntervalSecs);
//Test NM try to connect to RM Several times, but finally fail
nm = new NodeManager() {
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(
context, dispatcher, healthChecker, metrics,
rmStartIntervalMS, true);
return nodeStatusUpdater;
}
};
nm.init(conf);
long waitStartTime = System.currentTimeMillis();
try {
nm.start();
Assert.fail("NM should have failed to start due to RM connect failure");
} catch(Exception e) {
Assert.assertTrue("NM should have tried re-connecting to RM during " +
"period of at least " + connectionWaitSecs + " seconds, but " +
"stopped retrying within " + (connectionWaitSecs + delta/1000) +
" seconds", (System.currentTimeMillis() - waitStartTime
>= connectionWaitSecs*1000) && (System.currentTimeMillis()
- waitStartTime < (connectionWaitSecs*1000+delta)));
}
//Test NM connect to RM, fail at first several attempts,
//but finally success.
nm = new NodeManager() {
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(
context, dispatcher, healthChecker, metrics, rmStartIntervalMS,
false);
return nodeStatusUpdater;
}
};
nm.init(conf);
waitStartTime = System.currentTimeMillis();
try {
nm.start();
} catch (Exception ex){
Assert.fail("NM should have started successfully " +
"after connecting to RM.");
}
Assert.assertTrue("NM should have connected to RM within " + delta/1000
+" seconds of RM starting up.",
(System.currentTimeMillis() - waitStartTime >= rmStartIntervalMS)
&& (System.currentTimeMillis() - waitStartTime
< (rmStartIntervalMS+delta)));
}
/**
* Verifies that if for some reason NM fails to start ContainerManager RPC
* server, RM is oblivious to NM's presence. The behaviour is like this

View File

@ -19,41 +19,39 @@
package org.apache.hadoop.yarn.server.webproxy.amfilter;
import java.io.IOException;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.*;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import junit.framework.Assert;
import static junit.framework.Assert.*;
import org.apache.hadoop.yarn.server.webproxy.WebAppProxyServlet;
import org.glassfish.grizzly.servlet.HttpServletResponseImpl;
import org.junit.Test;
import org.mockito.Mockito;
/**
* Test AmIpFilter. Requests to a no declared hosts should has way through
* proxy. Another requests can be filtered with (without) user name.
*
*/
public class TestAmFilter {
public class TestAmFilter {
private String proxyHost = "bogushost.com";
private String proxyHost = "localhost";
private String proxyUri = "http://bogus";
private String doFilterRequest;
private AmIpServletRequestWrapper servletWrapper;
private class TestAmIpFilter extends AmIpFilter {
private Set<String> proxyAddresses = null;
protected Set<String> getProxyAddresses() {
if(proxyAddresses == null) {
if (proxyAddresses == null) {
proxyAddresses = new HashSet<String>();
}
proxyAddresses.add(proxyHost);
@ -61,12 +59,10 @@ public class TestAmFilter {
}
}
private static class DummyFilterConfig implements FilterConfig {
final Map<String, String> map;
DummyFilterConfig(Map<String,String> map) {
DummyFilterConfig(Map<String, String> map) {
this.map = map;
}
@ -74,22 +70,24 @@ public class TestAmFilter {
public String getFilterName() {
return "dummy";
}
@Override
public String getInitParameter(String arg0) {
return map.get(arg0);
}
@Override
public Enumeration<String> getInitParameterNames() {
return Collections.enumeration(map.keySet());
}
@Override
public ServletContext getServletContext() {
return null;
}
}
@Test
@Test(timeout = 5000)
public void filterNullCookies() throws Exception {
HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
@ -97,13 +95,12 @@ public class TestAmFilter {
Mockito.when(request.getRemoteAddr()).thenReturn(proxyHost);
HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
final AtomicBoolean invoked = new AtomicBoolean();
FilterChain chain = new FilterChain() {
@Override
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse)
throws IOException, ServletException {
public void doFilter(ServletRequest servletRequest,
ServletResponse servletResponse) throws IOException, ServletException {
invoked.set(true);
}
};
@ -115,7 +112,93 @@ public class TestAmFilter {
Filter filter = new TestAmIpFilter();
filter.init(conf);
filter.doFilter(request, response, chain);
Assert.assertTrue(invoked.get());
assertTrue(invoked.get());
filter.destroy();
}
/**
* Test AmIpFilter
*/
@Test(timeout = 1000)
public void testFilter() throws Exception {
Map<String, String> params = new HashMap<String, String>();
params.put(AmIpFilter.PROXY_HOST, proxyHost);
params.put(AmIpFilter.PROXY_URI_BASE, proxyUri);
FilterConfig config = new DummyFilterConfig(params);
// dummy filter
FilterChain chain = new FilterChain() {
@Override
public void doFilter(ServletRequest servletRequest,
ServletResponse servletResponse) throws IOException, ServletException {
doFilterRequest = servletRequest.getClass().getName();
if (servletRequest instanceof AmIpServletRequestWrapper) {
servletWrapper = (AmIpServletRequestWrapper) servletRequest;
}
}
};
AmIpFilter testFilter = new AmIpFilter();
testFilter.init(config);
HttpServletResponseForTest response = new HttpServletResponseForTest();
// Test request should implements HttpServletRequest
ServletRequest failRequest = Mockito.mock(ServletRequest.class);
try {
testFilter.doFilter(failRequest, response, chain);
fail();
} catch (ServletException e) {
assertEquals("This filter only works for HTTP/HTTPS", e.getMessage());
}
// request with HttpServletRequest
HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
Mockito.when(request.getRemoteAddr()).thenReturn("redirect");
Mockito.when(request.getRequestURI()).thenReturn("/redirect");
testFilter.doFilter(request, response, chain);
// address "redirect" is not in host list
assertEquals("http://bogus/redirect", response.getRedirect());
// "127.0.0.1" contains in host list. Without cookie
Mockito.when(request.getRemoteAddr()).thenReturn("127.0.0.1");
testFilter.doFilter(request, response, chain);
assertTrue(doFilterRequest
.contains("javax.servlet.http.HttpServletRequest"));
// cookie added
Cookie[] cookies = new Cookie[1];
cookies[0] = new Cookie(WebAppProxyServlet.PROXY_USER_COOKIE_NAME, "user");
Mockito.when(request.getCookies()).thenReturn(cookies);
testFilter.doFilter(request, response, chain);
assertEquals(
"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpServletRequestWrapper",
doFilterRequest);
// request contains principal from cookie
assertEquals("user", servletWrapper.getUserPrincipal().getName());
assertEquals("user", servletWrapper.getRemoteUser());
assertFalse(servletWrapper.isUserInRole(""));
}
private class HttpServletResponseForTest extends HttpServletResponseImpl {
String redirectLocation = "";
public String getRedirect() {
return redirectLocation;
}
@Override
public void sendRedirect(String location) throws IOException {
redirectLocation = location;
}
@Override
public String encodeRedirectURL(String url) {
return url;
}
}
}