From 25cdbdb71a65242b2bc08ca1d61f2c0f7d7ea891 Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Mon, 30 Sep 2013 19:21:17 +0000 Subject: [PATCH 001/133] HDFS-5230. Introduce RpcInfo to decouple XDR classes from the RPC API. Contributed by Haohui Mai git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527726 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/nfs/nfs3/Nfs3Base.java | 19 +-- .../apache/hadoop/oncrpc/RpcCallCache.java | 10 +- .../org/apache/hadoop/oncrpc/RpcInfo.java | 60 ++++++++ .../org/apache/hadoop/oncrpc/RpcProgram.java | 135 ++++++------------ .../org/apache/hadoop/oncrpc/RpcResponse.java | 45 ++++++ .../org/apache/hadoop/oncrpc/RpcUtil.java | 91 +++++++++++- .../apache/hadoop/oncrpc/SimpleTcpServer.java | 26 ++-- .../hadoop/oncrpc/SimpleTcpServerHandler.java | 63 -------- .../apache/hadoop/oncrpc/SimpleUdpServer.java | 19 +-- .../hadoop/oncrpc/SimpleUdpServerHandler.java | 61 -------- .../java/org/apache/hadoop/oncrpc/XDR.java | 11 +- .../hadoop/oncrpc/security/Verifier.java | 9 +- .../hadoop/portmap/RpcProgramPortmap.java | 30 ++-- .../hadoop/oncrpc/TestFrameDecoder.java | 35 +++-- .../hadoop/oncrpc/TestRpcCallCache.java | 8 +- .../hdfs/nfs/mount/RpcProgramMountd.java | 24 +++- .../hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java | 133 +++++++++++------ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + 18 files changed, 429 insertions(+), 353 deletions(-) create mode 100644 hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java create mode 100644 hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java delete mode 100644 hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServerHandler.java delete mode 100644 hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServerHandler.java diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java index 66afbb0d76e..a519ddd8416 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java @@ -22,13 +22,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mount.MountdBase; import org.apache.hadoop.oncrpc.RpcProgram; -import org.apache.hadoop.oncrpc.RpcUtil; import org.apache.hadoop.oncrpc.SimpleTcpServer; -import org.apache.hadoop.oncrpc.SimpleTcpServerHandler; import org.apache.hadoop.portmap.PortmapMapping; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.Channels; /** * Nfs server. Supports NFS v3 using {@link RpcProgram}. @@ -72,19 +67,7 @@ public abstract class Nfs3Base { private void startTCPServer() { SimpleTcpServer tcpServer = new SimpleTcpServer(nfsPort, - rpcProgram, 0) { - @Override - public ChannelPipelineFactory getPipelineFactory() { - return new ChannelPipelineFactory() { - @Override - public ChannelPipeline getPipeline() { - return Channels.pipeline( - RpcUtil.constructRpcFrameDecoder(), - new SimpleTcpServerHandler(rpcProgram)); - } - }; - } - }; + rpcProgram, 0); tcpServer.run(); } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java index 0862d4fb4ea..0c857be9bb0 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcCallCache.java @@ -44,7 +44,7 @@ import com.google.common.annotations.VisibleForTesting; public class RpcCallCache { public static class CacheEntry { - private XDR response; // null if no response has been sent + private RpcResponse response; // null if no response has been sent public CacheEntry() { response = null; @@ -58,11 +58,11 @@ public class RpcCallCache { return response != null; } - public XDR getResponse() { + public RpcResponse getResponse() { return response; } - public void setResponse(XDR response) { + public void setResponse(RpcResponse response) { this.response = response; } } @@ -128,13 +128,13 @@ public class RpcCallCache { } /** Mark a request as completed and add corresponding response to the cache */ - public void callCompleted(InetAddress clientId, int xid, XDR response) { + public void callCompleted(InetAddress clientId, int xid, RpcResponse response) { ClientRequest req = new ClientRequest(clientId, xid); CacheEntry e; synchronized(map) { e = map.get(req); } - e.setResponse(response); + e.response = response; } /** diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java new file mode 100644 index 00000000000..b434d79285c --- /dev/null +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcInfo.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.oncrpc; + +import java.net.SocketAddress; + +import org.jboss.netty.buffer.ChannelBuffer; +import org.jboss.netty.channel.Channel; +import org.jboss.netty.channel.ChannelHandlerContext; + +/** + * RpcInfo records all contextual information of an RPC message. It contains + * the RPC header, the parameters, and the information of the remote peer. + */ +public final class RpcInfo { + private final RpcMessage header; + private final ChannelBuffer data; + private final Channel channel; + private final SocketAddress remoteAddress; + + public RpcInfo(RpcMessage header, ChannelBuffer data, + ChannelHandlerContext channelContext, Channel channel, + SocketAddress remoteAddress) { + this.header = header; + this.data = data; + this.channel = channel; + this.remoteAddress = remoteAddress; + } + + public RpcMessage header() { + return header; + } + + public ChannelBuffer data() { + return data; + } + + public Channel channel() { + return channel; + } + + public SocketAddress remoteAddress() { + return remoteAddress; + } +} diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java index d457b3aaa91..36348980056 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java @@ -18,22 +18,24 @@ package org.apache.hadoop.oncrpc; import java.io.IOException; -import java.net.InetAddress; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.oncrpc.RpcAcceptedReply.AcceptState; -import org.apache.hadoop.oncrpc.RpcCallCache.CacheEntry; -import org.apache.hadoop.oncrpc.security.VerifierNone; +import org.apache.hadoop.oncrpc.security.Verifier; import org.apache.hadoop.portmap.PortmapMapping; import org.apache.hadoop.portmap.PortmapRequest; -import org.jboss.netty.channel.Channel; +import org.jboss.netty.buffer.ChannelBuffer; +import org.jboss.netty.buffer.ChannelBuffers; +import org.jboss.netty.channel.ChannelHandlerContext; +import org.jboss.netty.channel.MessageEvent; +import org.jboss.netty.channel.SimpleChannelUpstreamHandler; /** * Class for writing RPC server programs based on RFC 1050. Extend this class * and implement {@link #handleInternal} to handle the requests received. */ -public abstract class RpcProgram { +public abstract class RpcProgram extends SimpleChannelUpstreamHandler { private static final Log LOG = LogFactory.getLog(RpcProgram.class); public static final int RPCB_PORT = 111; private final String program; @@ -42,7 +44,6 @@ public abstract class RpcProgram { private final int progNumber; private final int lowProgVersion; private final int highProgVersion; - private final RpcCallCache rpcCallCache; /** * Constructor @@ -53,19 +54,15 @@ public abstract class RpcProgram { * @param progNumber program number as defined in RFC 1050 * @param lowProgVersion lowest version of the specification supported * @param highProgVersion highest version of the specification supported - * @param cacheSize size of cache to handle duplciate requests. Size <= 0 - * indicates no cache. */ protected RpcProgram(String program, String host, int port, int progNumber, - int lowProgVersion, int highProgVersion, int cacheSize) { + int lowProgVersion, int highProgVersion) { this.program = program; this.host = host; this.port = port; this.progNumber = progNumber; this.lowProgVersion = lowProgVersion; this.highProgVersion = highProgVersion; - this.rpcCallCache = cacheSize > 0 ? new RpcCallCache(program, cacheSize) - : null; } /** @@ -103,92 +100,50 @@ public abstract class RpcProgram { } } - /** - * Handle an RPC request. - * @param rpcCall RPC call that is received - * @param in xdr with cursor at reading the remaining bytes of a method call - * @param out xdr output corresponding to Rpc reply - * @param client making the Rpc request - * @param channel connection over which Rpc request is received - * @return response xdr response - */ - protected abstract XDR handleInternal(RpcCall rpcCall, XDR in, XDR out, - InetAddress client, Channel channel); - - public XDR handle(XDR xdr, InetAddress client, Channel channel) { - XDR out = new XDR(); - RpcCall rpcCall = RpcCall.read(xdr); - if (LOG.isDebugEnabled()) { - LOG.debug(program + " procedure #" + rpcCall.getProcedure()); + @Override + public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + throws Exception { + RpcInfo info = (RpcInfo) e.getMessage(); + RpcCall call = (RpcCall) info.header(); + if (LOG.isTraceEnabled()) { + LOG.trace(program + " procedure #" + call.getProcedure()); } - if (!checkProgram(rpcCall.getProgram())) { - return programMismatch(out, rpcCall); + if (this.progNumber != call.getProgram()) { + LOG.warn("Invalid RPC call program " + call.getProgram()); + RpcAcceptedReply reply = RpcAcceptedReply.getInstance(call.getXid(), + AcceptState.PROG_UNAVAIL, Verifier.VERIFIER_NONE); + + XDR out = new XDR(); + reply.write(out); + ChannelBuffer b = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + .buffer()); + RpcResponse rsp = new RpcResponse(b, info.remoteAddress()); + RpcUtil.sendRpcResponse(ctx, rsp); + return; } - if (!checkProgramVersion(rpcCall.getVersion())) { - return programVersionMismatch(out, rpcCall); + int ver = call.getVersion(); + if (ver < lowProgVersion || ver > highProgVersion) { + LOG.warn("Invalid RPC call version " + ver); + RpcAcceptedReply reply = RpcAcceptedReply.getInstance(call.getXid(), + AcceptState.PROG_MISMATCH, Verifier.VERIFIER_NONE); + + XDR out = new XDR(); + reply.write(out); + out.writeInt(lowProgVersion); + out.writeInt(highProgVersion); + ChannelBuffer b = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + .buffer()); + RpcResponse rsp = new RpcResponse(b, info.remoteAddress()); + RpcUtil.sendRpcResponse(ctx, rsp); + return; } - // Check for duplicate requests in the cache for non-idempotent requests - boolean idempotent = rpcCallCache != null && !isIdempotent(rpcCall); - if (idempotent) { - CacheEntry entry = rpcCallCache.checkOrAddToCache(client, rpcCall.getXid()); - if (entry != null) { // in ache - if (entry.isCompleted()) { - LOG.info("Sending the cached reply to retransmitted request " - + rpcCall.getXid()); - return entry.getResponse(); - } else { // else request is in progress - LOG.info("Retransmitted request, transaction still in progress " - + rpcCall.getXid()); - // TODO: ignore the request? - } - } - } - - XDR response = handleInternal(rpcCall, xdr, out, client, channel); - if (response.size() == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug("No sync response, expect an async response for request XID=" - + rpcCall.getXid()); - } - } - - // Add the request to the cache - if (idempotent) { - rpcCallCache.callCompleted(client, rpcCall.getXid(), response); - } - return response; - } - - private XDR programMismatch(XDR out, RpcCall call) { - LOG.warn("Invalid RPC call program " + call.getProgram()); - RpcAcceptedReply reply = RpcAcceptedReply.getInstance(call.getXid(), - AcceptState.PROG_UNAVAIL, new VerifierNone()); - reply.write(out); - return out; - } - - private XDR programVersionMismatch(XDR out, RpcCall call) { - LOG.warn("Invalid RPC call version " + call.getVersion()); - RpcAcceptedReply reply = RpcAcceptedReply.getInstance(call.getXid(), - AcceptState.PROG_MISMATCH, new VerifierNone()); - reply.write(out); - out.writeInt(lowProgVersion); - out.writeInt(highProgVersion); - return out; - } - - private boolean checkProgram(int progNumber) { - return this.progNumber == progNumber; - } - - /** Return true if a the program version in rpcCall is supported */ - private boolean checkProgramVersion(int programVersion) { - return programVersion >= lowProgVersion - && programVersion <= highProgVersion; + handleInternal(ctx, info); } + + protected abstract void handleInternal(ChannelHandlerContext ctx, RpcInfo info); @Override public String toString() { diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java new file mode 100644 index 00000000000..2e45e6100b1 --- /dev/null +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcResponse.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.oncrpc; + +import java.net.SocketAddress; + +import org.jboss.netty.buffer.ChannelBuffer; + +/** + * RpcResponse encapsulates a response to a RPC request. It contains the data + * that is going to cross the wire, as well as the information of the remote + * peer. + */ +public class RpcResponse { + private final ChannelBuffer data; + private final SocketAddress remoteAddress; + + public RpcResponse(ChannelBuffer data, SocketAddress remoteAddress) { + this.data = data; + this.remoteAddress = remoteAddress; + } + + public ChannelBuffer data() { + return data; + } + + public SocketAddress remoteAddress() { + return remoteAddress; + } +} diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java index 04ebbbc39bc..e9878b7959c 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcUtil.java @@ -17,17 +17,23 @@ */ package org.apache.hadoop.oncrpc; +import java.nio.ByteBuffer; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jboss.netty.buffer.ChannelBuffer; import org.jboss.netty.buffer.ChannelBuffers; import org.jboss.netty.channel.Channel; import org.jboss.netty.channel.ChannelHandlerContext; +import org.jboss.netty.channel.Channels; +import org.jboss.netty.channel.MessageEvent; +import org.jboss.netty.channel.SimpleChannelUpstreamHandler; import org.jboss.netty.handler.codec.frame.FrameDecoder; -public class RpcUtil { +public final class RpcUtil { /** - * The XID in RPC call. It is used for starting with new seed after each reboot. + * The XID in RPC call. It is used for starting with new seed after each + * reboot. */ private static int xid = (int) (System.currentTimeMillis() / 1000) << 12; @@ -35,10 +41,27 @@ public class RpcUtil { return xid = ++xid + caller.hashCode(); } + public static void sendRpcResponse(ChannelHandlerContext ctx, + RpcResponse response) { + Channels.fireMessageReceived(ctx, response); + } + public static FrameDecoder constructRpcFrameDecoder() { return new RpcFrameDecoder(); } + public static final SimpleChannelUpstreamHandler STAGE_RPC_MESSAGE_PARSER = new RpcMessageParserStage(); + public static final SimpleChannelUpstreamHandler STAGE_RPC_TCP_RESPONSE = new RpcTcpResponseStage(); + public static final SimpleChannelUpstreamHandler STAGE_RPC_UDP_RESPONSE = new RpcUdpResponseStage(); + + /** + * An RPC client can separate a RPC message into several frames (i.e., + * fragments) when transferring it across the wire. RpcFrameDecoder + * reconstructs a full RPC message from these fragments. + * + * RpcFrameDecoder is a stateful pipeline stage. It has to be constructed for + * each RPC client. + */ static class RpcFrameDecoder extends FrameDecoder { public static final Log LOG = LogFactory.getLog(RpcFrameDecoder.class); private ChannelBuffer currentFrame; @@ -78,4 +101,68 @@ public class RpcUtil { } } } + + /** + * RpcMessageParserStage parses the network bytes and encapsulates the RPC + * request into a RpcInfo instance. + */ + static final class RpcMessageParserStage extends SimpleChannelUpstreamHandler { + private static final Log LOG = LogFactory + .getLog(RpcMessageParserStage.class); + + @Override + public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + throws Exception { + ChannelBuffer buf = (ChannelBuffer) e.getMessage(); + ByteBuffer b = buf.toByteBuffer().asReadOnlyBuffer(); + XDR in = new XDR(b, XDR.State.READING); + + RpcInfo info = null; + try { + RpcCall callHeader = RpcCall.read(in); + ChannelBuffer dataBuffer = ChannelBuffers.wrappedBuffer(in.buffer() + .slice()); + info = new RpcInfo(callHeader, dataBuffer, ctx, e.getChannel(), + e.getRemoteAddress()); + } catch (Exception exc) { + LOG.info("Malfromed RPC request from " + e.getRemoteAddress()); + } + + if (info != null) { + Channels.fireMessageReceived(ctx, info); + } + } + } + + /** + * RpcTcpResponseStage sends an RpcResponse across the wire with the + * appropriate fragment header. + */ + private static class RpcTcpResponseStage extends SimpleChannelUpstreamHandler { + + @Override + public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + throws Exception { + RpcResponse r = (RpcResponse) e.getMessage(); + byte[] fragmentHeader = XDR.recordMark(r.data().readableBytes(), true); + ChannelBuffer header = ChannelBuffers.wrappedBuffer(fragmentHeader); + ChannelBuffer d = ChannelBuffers.wrappedBuffer(header, r.data()); + e.getChannel().write(d); + } + } + + /** + * RpcUdpResponseStage sends an RpcResponse as a UDP packet, which does not + * require a fragment header. + */ + private static final class RpcUdpResponseStage extends + SimpleChannelUpstreamHandler { + + @Override + public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) + throws Exception { + RpcResponse r = (RpcResponse) e.getMessage(); + e.getChannel().write(r.data(), r.remoteAddress()); + } + } } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java index 6f668a21065..57ef77a95fe 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServer.java @@ -27,6 +27,7 @@ import org.jboss.netty.channel.ChannelFactory; import org.jboss.netty.channel.ChannelPipeline; import org.jboss.netty.channel.ChannelPipelineFactory; import org.jboss.netty.channel.Channels; +import org.jboss.netty.channel.SimpleChannelUpstreamHandler; import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; /** @@ -35,8 +36,7 @@ import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; public class SimpleTcpServer { public static final Log LOG = LogFactory.getLog(SimpleTcpServer.class); protected final int port; - protected final ChannelPipelineFactory pipelineFactory; - protected final RpcProgram rpcProgram; + protected final SimpleChannelUpstreamHandler rpcProgram; /** The maximum number of I/O worker threads */ protected final int workerCount; @@ -50,18 +50,6 @@ public class SimpleTcpServer { this.port = port; this.rpcProgram = program; this.workerCount = workercount; - this.pipelineFactory = getPipelineFactory(); - } - - public ChannelPipelineFactory getPipelineFactory() { - return new ChannelPipelineFactory() { - @Override - public ChannelPipeline getPipeline() { - return Channels.pipeline( - RpcUtil.constructRpcFrameDecoder(), - new SimpleTcpServerHandler(rpcProgram)); - } - }; } public void run() { @@ -78,7 +66,15 @@ public class SimpleTcpServer { } ServerBootstrap bootstrap = new ServerBootstrap(factory); - bootstrap.setPipelineFactory(pipelineFactory); + bootstrap.setPipelineFactory(new ChannelPipelineFactory() { + + @Override + public ChannelPipeline getPipeline() throws Exception { + return Channels.pipeline(RpcUtil.constructRpcFrameDecoder(), + RpcUtil.STAGE_RPC_MESSAGE_PARSER, rpcProgram, + RpcUtil.STAGE_RPC_TCP_RESPONSE); + } + }); bootstrap.setOption("child.tcpNoDelay", true); bootstrap.setOption("child.keepAlive", true); diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServerHandler.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServerHandler.java deleted file mode 100644 index 04e2930f60b..00000000000 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleTcpServerHandler.java +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.oncrpc; - -import java.net.InetAddress; -import java.net.InetSocketAddress; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ExceptionEvent; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelHandler; - -/** - * Handler used by {@link SimpleTcpServer}. - */ -public class SimpleTcpServerHandler extends SimpleChannelHandler { - public static final Log LOG = LogFactory.getLog(SimpleTcpServerHandler.class); - - protected final RpcProgram rpcProgram; - - public SimpleTcpServerHandler(RpcProgram rpcProgram) { - this.rpcProgram = rpcProgram; - } - - @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { - ChannelBuffer buf = (ChannelBuffer) e.getMessage(); - XDR request = new XDR(buf.toByteBuffer().asReadOnlyBuffer(), XDR.State.READING); - - InetAddress remoteInetAddr = ((InetSocketAddress) ctx.getChannel() - .getRemoteAddress()).getAddress(); - Channel outChannel = e.getChannel(); - XDR response = rpcProgram.handle(request, remoteInetAddr, outChannel); - if (response.size() > 0) { - outChannel.write(XDR.writeMessageTcp(response, true)); - } - } - - @Override - public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) { - LOG.warn("Encountered ", e.getCause()); - e.getChannel().close(); - } -} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java index 70bffba66d6..438eebc5378 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServer.java @@ -23,9 +23,8 @@ import java.util.concurrent.Executors; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jboss.netty.bootstrap.ConnectionlessBootstrap; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; import org.jboss.netty.channel.Channels; +import org.jboss.netty.channel.SimpleChannelUpstreamHandler; import org.jboss.netty.channel.socket.DatagramChannelFactory; import org.jboss.netty.channel.socket.nio.NioDatagramChannelFactory; @@ -38,20 +37,13 @@ public class SimpleUdpServer { private final int RECEIVE_BUFFER_SIZE = 65536; protected final int port; - protected final ChannelPipelineFactory pipelineFactory; - protected final RpcProgram rpcProgram; + protected final SimpleChannelUpstreamHandler rpcProgram; protected final int workerCount; - public SimpleUdpServer(int port, RpcProgram program, int workerCount) { + public SimpleUdpServer(int port, SimpleChannelUpstreamHandler program, int workerCount) { this.port = port; this.rpcProgram = program; this.workerCount = workerCount; - this.pipelineFactory = new ChannelPipelineFactory() { - @Override - public ChannelPipeline getPipeline() { - return Channels.pipeline(new SimpleUdpServerHandler(rpcProgram)); - } - }; } public void run() { @@ -60,8 +52,9 @@ public class SimpleUdpServer { Executors.newCachedThreadPool(), workerCount); ConnectionlessBootstrap b = new ConnectionlessBootstrap(f); - ChannelPipeline p = b.getPipeline(); - p.addLast("handler", new SimpleUdpServerHandler(rpcProgram)); + b.setPipeline(Channels.pipeline( + RpcUtil.STAGE_RPC_MESSAGE_PARSER, rpcProgram, + RpcUtil.STAGE_RPC_UDP_RESPONSE)); b.setOption("broadcast", "false"); b.setOption("sendBufferSize", SEND_BUFFER_SIZE); diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServerHandler.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServerHandler.java deleted file mode 100644 index 79a255b2616..00000000000 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/SimpleUdpServerHandler.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.oncrpc; - -import java.net.InetAddress; -import java.net.InetSocketAddress; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.jboss.netty.buffer.ChannelBuffer; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ExceptionEvent; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelHandler; - -/** - * Handler used by {@link SimpleUdpServer}. - */ -public class SimpleUdpServerHandler extends SimpleChannelHandler { - public static final Log LOG = LogFactory.getLog(SimpleUdpServerHandler.class); - private final RpcProgram rpcProgram; - - public SimpleUdpServerHandler(RpcProgram rpcProgram) { - this.rpcProgram = rpcProgram; - } - - @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) { - ChannelBuffer buf = (ChannelBuffer) e.getMessage(); - - XDR request = new XDR(buf.toByteBuffer().asReadOnlyBuffer(), XDR.State.READING); - - InetAddress remoteInetAddr = ((InetSocketAddress) e.getRemoteAddress()) - .getAddress(); - XDR response = rpcProgram.handle(request, remoteInetAddr, null); - - e.getChannel().write(XDR.writeMessageUdp(response.asReadOnlyWrap()), - e.getRemoteAddress()); - } - - @Override - public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) { - LOG.warn("Encountered ", e.getCause()); - e.getChannel().close(); - } -} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java index df2b91f05f4..2fdabe2fda7 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/XDR.java @@ -93,6 +93,10 @@ public final class XDR { return n; } + public ByteBuffer buffer() { + return buf.duplicate(); + } + public int size() { // TODO: This overloading intends to be compatible with the semantics of // the previous version of the class. This function should be separated into @@ -219,7 +223,7 @@ public final class XDR { return xdr.buf.remaining() >= len; } - private static byte[] recordMark(int size, boolean last) { + static byte[] recordMark(int size, boolean last) { byte[] b = new byte[SIZEOF_INT]; ByteBuffer buf = ByteBuffer.wrap(b); buf.putInt(!last ? size : size | 0x80000000); @@ -259,9 +263,8 @@ public final class XDR { @VisibleForTesting public byte[] getBytes() { - ByteBuffer d = buf.duplicate(); - byte[] b = new byte[d.position()]; - d.flip(); + ByteBuffer d = asReadOnlyWrap().buffer(); + byte[] b = new byte[d.remaining()]; d.get(b); return b; diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java index 5184e94f29a..e60db97fd50 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/security/Verifier.java @@ -18,16 +18,17 @@ package org.apache.hadoop.oncrpc.security; import org.apache.hadoop.oncrpc.XDR; -import org.apache.hadoop.oncrpc.security.RpcAuthInfo.AuthFlavor; /** * Base class for verifier. Currently our authentication only supports 3 types - * of auth flavors: {@link AuthFlavor#AUTH_NONE}, {@link AuthFlavor#AUTH_SYS}, - * and {@link AuthFlavor#RPCSEC_GSS}. Thus for verifier we only need to handle + * of auth flavors: {@link RpcAuthInfo.AuthFlavor#AUTH_NONE}, {@link RpcAuthInfo.AuthFlavor#AUTH_SYS}, + * and {@link RpcAuthInfo.AuthFlavor#RPCSEC_GSS}. Thus for verifier we only need to handle * AUTH_NONE and RPCSEC_GSS */ public abstract class Verifier extends RpcAuthInfo { + public static final Verifier VERIFIER_NONE = new VerifierNone(); + protected Verifier(AuthFlavor flavor) { super(flavor); } @@ -61,6 +62,4 @@ public abstract class Verifier extends RpcAuthInfo { } verifier.write(xdr); } - - } diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java index 46e602c8626..bd9f48cb524 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/RpcProgramPortmap.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.portmap; -import java.net.InetAddress; import java.util.HashMap; import java.util.Map.Entry; import java.util.Set; @@ -26,10 +25,15 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.oncrpc.RpcAcceptedReply; import org.apache.hadoop.oncrpc.RpcCall; +import org.apache.hadoop.oncrpc.RpcInfo; import org.apache.hadoop.oncrpc.RpcProgram; +import org.apache.hadoop.oncrpc.RpcResponse; +import org.apache.hadoop.oncrpc.RpcUtil; import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.VerifierNone; -import org.jboss.netty.channel.Channel; +import org.jboss.netty.buffer.ChannelBuffer; +import org.jboss.netty.buffer.ChannelBuffers; +import org.jboss.netty.channel.ChannelHandlerContext; /** * An rpcbind request handler. @@ -44,7 +48,7 @@ public class RpcProgramPortmap extends RpcProgram implements PortmapInterface { private final HashMap map; public RpcProgramPortmap() { - super("portmap", "localhost", RPCB_PORT, PROGRAM, VERSION, VERSION, 0); + super("portmap", "localhost", RPCB_PORT, PROGRAM, VERSION, VERSION); map = new HashMap(256); } @@ -130,10 +134,15 @@ public class RpcProgramPortmap extends RpcProgram implements PortmapInterface { } @Override - public XDR handleInternal(RpcCall rpcCall, XDR in, XDR out, - InetAddress client, Channel channel) { + public void handleInternal(ChannelHandlerContext ctx, RpcInfo info) { + RpcCall rpcCall = (RpcCall) info.header(); final Procedure portmapProc = Procedure.fromValue(rpcCall.getProcedure()); int xid = rpcCall.getXid(); + byte[] data = new byte[info.data().readableBytes()]; + info.data().readBytes(data); + XDR in = new XDR(data); + XDR out = new XDR(); + if (portmapProc == Procedure.PMAPPROC_NULL) { out = nullOp(xid, in, out); } else if (portmapProc == Procedure.PMAPPROC_SET) { @@ -148,11 +157,14 @@ public class RpcProgramPortmap extends RpcProgram implements PortmapInterface { out = getport(xid, in, out); } else { LOG.info("PortmapHandler unknown rpc procedure=" + portmapProc); - RpcAcceptedReply.getInstance(xid, - RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()).write( - out); + RpcAcceptedReply reply = RpcAcceptedReply.getInstance(xid, + RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()); + reply.write(out); } - return out; + + ChannelBuffer buf = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap().buffer()); + RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); + RpcUtil.sendRpcResponse(ctx, rsp); } @Override diff --git a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java index 0c306861b50..cdeaa3f2bed 100644 --- a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java +++ b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import java.net.InetAddress; import java.nio.ByteBuffer; import org.apache.hadoop.oncrpc.RpcUtil.RpcFrameDecoder; @@ -30,6 +29,7 @@ import org.apache.hadoop.oncrpc.security.CredentialsNone; import org.apache.hadoop.oncrpc.security.VerifierNone; import org.jboss.netty.buffer.ByteBufferBackedChannelBuffer; import org.jboss.netty.buffer.ChannelBuffer; +import org.jboss.netty.buffer.ChannelBuffers; import org.jboss.netty.channel.Channel; import org.jboss.netty.channel.ChannelHandlerContext; import org.junit.Test; @@ -38,7 +38,7 @@ import org.mockito.Mockito; public class TestFrameDecoder { private static int port = 12345; // some random server port - private static XDR result = null; + private static int resultSize; static void testRequest(XDR request) { SimpleTcpClient tcpClient = new SimpleTcpClient("localhost", port, request, @@ -49,18 +49,20 @@ public class TestFrameDecoder { static class TestRpcProgram extends RpcProgram { protected TestRpcProgram(String program, String host, int port, - int progNumber, int lowProgVersion, int highProgVersion, int cacheSize) { - super(program, host, port, progNumber, lowProgVersion, highProgVersion, - cacheSize); + int progNumber, int lowProgVersion, int highProgVersion) { + super(program, host, port, progNumber, lowProgVersion, highProgVersion); } @Override - public XDR handleInternal(RpcCall rpcCall, XDR in, XDR out, - InetAddress client, Channel channel) { - // Get the final complete request and return a void response. - result = in; - RpcAcceptedReply.getAcceptInstance(1234, new VerifierNone()).write(out); - return out; + protected void handleInternal(ChannelHandlerContext ctx, RpcInfo info) { + resultSize = info.data().readableBytes(); + RpcAcceptedReply reply = RpcAcceptedReply.getAcceptInstance(1234, + new VerifierNone()); + XDR out = new XDR(); + reply.write(out); + ChannelBuffer b = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap().buffer()); + RpcResponse rsp = new RpcResponse(b, info.remoteAddress()); + RpcUtil.sendRpcResponse(ctx, rsp); } @Override @@ -147,21 +149,22 @@ public class TestFrameDecoder { public void testFrames() { RpcProgram program = new TestFrameDecoder.TestRpcProgram("TestRpcProgram", - "localhost", port, 100000, 1, 2, 100); + "localhost", port, 100000, 1, 2); SimpleTcpServer tcpServer = new SimpleTcpServer(port, program, 1); tcpServer.run(); XDR xdrOut = createGetportMount(); + int headerSize = xdrOut.size(); int bufsize = 2 * 1024 * 1024; byte[] buffer = new byte[bufsize]; xdrOut.writeFixedOpaque(buffer); - int requestSize = xdrOut.size(); + int requestSize = xdrOut.size() - headerSize; // Send the request to the server testRequest(xdrOut); // Verify the server got the request with right size - assertTrue(requestSize == result.size()); + assertEquals(requestSize, resultSize); } static void createPortmapXDRheader(XDR xdr_out, int procedure) { @@ -173,10 +176,6 @@ public class TestFrameDecoder { static XDR createGetportMount() { XDR xdr_out = new XDR(); createPortmapXDRheader(xdr_out, 3); - xdr_out.writeInt(0); // AUTH_NULL - xdr_out.writeInt(0); // cred len - xdr_out.writeInt(0); // verifier AUTH_NULL - xdr_out.writeInt(0); // verf len return xdr_out; } /* diff --git a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestRpcCallCache.java b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestRpcCallCache.java index f605fc20540..40015e2fbf1 100644 --- a/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestRpcCallCache.java +++ b/hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestRpcCallCache.java @@ -32,6 +32,8 @@ import org.apache.hadoop.oncrpc.RpcCallCache.CacheEntry; import org.apache.hadoop.oncrpc.RpcCallCache.ClientRequest; import org.junit.Test; +import static org.mockito.Mockito.*; + /** * Unit tests for {@link RpcCallCache} */ @@ -67,7 +69,7 @@ public class TestRpcCallCache { validateInprogressCacheEntry(e); // Set call as completed - XDR response = new XDR(); + RpcResponse response = mock(RpcResponse.class); cache.callCompleted(clientIp, xid, response); e = cache.checkOrAddToCache(clientIp, xid); validateCompletedCacheEntry(e, response); @@ -79,7 +81,7 @@ public class TestRpcCallCache { assertNull(c.getResponse()); } - private void validateCompletedCacheEntry(CacheEntry c, XDR response) { + private void validateCompletedCacheEntry(CacheEntry c, RpcResponse response) { assertFalse(c.isInProgress()); assertTrue(c.isCompleted()); assertEquals(response, c.getResponse()); @@ -93,7 +95,7 @@ public class TestRpcCallCache { assertFalse(c.isCompleted()); assertNull(c.getResponse()); - XDR response = new XDR(); + RpcResponse response = mock(RpcResponse.class); c.setResponse(response); validateCompletedCacheEntry(c, response); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java index 0c1ada61321..f8ac1dc1e4e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.nfs.mount; import java.io.IOException; import java.net.InetAddress; +import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -38,10 +39,15 @@ import org.apache.hadoop.nfs.nfs3.FileHandle; import org.apache.hadoop.nfs.nfs3.Nfs3Status; import org.apache.hadoop.oncrpc.RpcAcceptedReply; import org.apache.hadoop.oncrpc.RpcCall; +import org.apache.hadoop.oncrpc.RpcInfo; import org.apache.hadoop.oncrpc.RpcProgram; +import org.apache.hadoop.oncrpc.RpcResponse; +import org.apache.hadoop.oncrpc.RpcUtil; import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.VerifierNone; -import org.jboss.netty.channel.Channel; +import org.jboss.netty.buffer.ChannelBuffer; +import org.jboss.netty.buffer.ChannelBuffers; +import org.jboss.netty.channel.ChannelHandlerContext; /** * RPC program corresponding to mountd daemon. See {@link Mountd}. @@ -77,7 +83,7 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface { throws IOException { // Note that RPC cache is not enabled super("mountd", "localhost", config.getInt("nfs3.mountd.port", PORT), - PROGRAM, VERSION_1, VERSION_3, 0); + PROGRAM, VERSION_1, VERSION_3); this.hostsMatcher = NfsExports.getInstance(config); this.mounts = Collections.synchronizedList(new ArrayList()); @@ -173,10 +179,16 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface { } @Override - public XDR handleInternal(RpcCall rpcCall, XDR xdr, XDR out, - InetAddress client, Channel channel) { + public void handleInternal(ChannelHandlerContext ctx, RpcInfo info) { + RpcCall rpcCall = (RpcCall) info.header(); final MNTPROC mntproc = MNTPROC.fromValue(rpcCall.getProcedure()); int xid = rpcCall.getXid(); + byte[] data = new byte[info.data().readableBytes()]; + info.data().readBytes(data); + XDR xdr = new XDR(data); + XDR out = new XDR(); + InetAddress client = ((InetSocketAddress) info.remoteAddress()).getAddress(); + if (mntproc == MNTPROC.NULL) { out = nullOp(out, xid, client); } else if (mntproc == MNTPROC.MNT) { @@ -198,7 +210,9 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface { RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()).write( out); } - return out; + ChannelBuffer buf = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap().buffer()); + RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); + RpcUtil.sendRpcResponse(ctx, rsp); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index 16153c6faf5..a1f5c10406a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.net.InetAddress; +import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.EnumSet; @@ -103,9 +104,13 @@ import org.apache.hadoop.nfs.nfs3.response.WccAttr; import org.apache.hadoop.nfs.nfs3.response.WccData; import org.apache.hadoop.oncrpc.RpcAcceptedReply; import org.apache.hadoop.oncrpc.RpcCall; +import org.apache.hadoop.oncrpc.RpcCallCache; import org.apache.hadoop.oncrpc.RpcDeniedReply; +import org.apache.hadoop.oncrpc.RpcInfo; import org.apache.hadoop.oncrpc.RpcProgram; import org.apache.hadoop.oncrpc.RpcReply; +import org.apache.hadoop.oncrpc.RpcResponse; +import org.apache.hadoop.oncrpc.RpcUtil; import org.apache.hadoop.oncrpc.XDR; import org.apache.hadoop.oncrpc.security.Credentials; import org.apache.hadoop.oncrpc.security.CredentialsSys; @@ -115,7 +120,10 @@ import org.apache.hadoop.oncrpc.security.SysSecurityHandler; import org.apache.hadoop.oncrpc.security.Verifier; import org.apache.hadoop.oncrpc.security.VerifierNone; import org.apache.hadoop.security.AccessControlException; +import org.jboss.netty.buffer.ChannelBuffer; +import org.jboss.netty.buffer.ChannelBuffers; import org.jboss.netty.channel.Channel; +import org.jboss.netty.channel.ChannelHandlerContext; /** * RPC program corresponding to nfs daemon. See {@link Nfs3}. @@ -150,14 +158,15 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { private Statistics statistics; private String writeDumpDir; // The dir save dump files + private final RpcCallCache rpcCallCache; + public RpcProgramNfs3() throws IOException { this(new Configuration()); } - public RpcProgramNfs3(Configuration config) - throws IOException { + public RpcProgramNfs3(Configuration config) throws IOException { super("NFS3", "localhost", Nfs3Constant.PORT, Nfs3Constant.PROGRAM, - Nfs3Constant.VERSION, Nfs3Constant.VERSION, 100); + Nfs3Constant.VERSION, Nfs3Constant.VERSION); config.set(FsPermission.UMASK_LABEL, "000"); iug = new IdUserGroup(); @@ -183,6 +192,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } else { clearDirectory(writeDumpDir); } + + rpcCallCache = new RpcCallCache("NFS3", 256); } private void clearDirectory(String writeDumpDir) throws IOException { @@ -213,8 +224,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public GETATTR3Response getattr(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public GETATTR3Response getattr(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { GETATTR3Response response = new GETATTR3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -294,8 +305,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public SETATTR3Response setattr(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public SETATTR3Response setattr(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { SETATTR3Response response = new SETATTR3Response(Nfs3Status.NFS3_OK); DFSClient dfsClient = clientCache.get(securityHandler.getUser()); if (dfsClient == null) { @@ -370,8 +381,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public LOOKUP3Response lookup(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public LOOKUP3Response lookup(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { LOOKUP3Response response = new LOOKUP3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -432,8 +443,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public ACCESS3Response access(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public ACCESS3Response access(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { ACCESS3Response response = new ACCESS3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -574,7 +585,6 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { long offset = request.getOffset(); int count = request.getCount(); - FileHandle handle = request.getHandle(); if (LOG.isDebugEnabled()) { LOG.debug("NFS READ fileId: " + handle.getFileId() + " offset: " + offset @@ -720,8 +730,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public CREATE3Response create(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public CREATE3Response create(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { CREATE3Response response = new CREATE3Response(Nfs3Status.NFS3_OK); DFSClient dfsClient = clientCache.get(securityHandler.getUser()); if (dfsClient == null) { @@ -973,8 +983,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } String fileIdPath = dirFileIdPath + "/" + fileName; - HdfsFileStatus fstat = Nfs3Utils.getFileStatus(dfsClient, - fileIdPath); + HdfsFileStatus fstat = Nfs3Utils.getFileStatus(dfsClient, fileIdPath); if (fstat == null) { WccData dirWcc = new WccData(Nfs3Utils.getWccAttr(preOpDirAttr), preOpDirAttr); @@ -1056,8 +1065,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } String fileIdPath = dirFileIdPath + "/" + fileName; - HdfsFileStatus fstat = Nfs3Utils.getFileStatus(dfsClient, - fileIdPath); + HdfsFileStatus fstat = Nfs3Utils.getFileStatus(dfsClient, fileIdPath); if (fstat == null) { return new RMDIR3Response(Nfs3Status.NFS3ERR_NOENT, errWcc); } @@ -1098,8 +1106,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public RENAME3Response rename(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public RENAME3Response rename(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { RENAME3Response response = new RENAME3Response(Nfs3Status.NFS3_OK); DFSClient dfsClient = clientCache.get(securityHandler.getUser()); if (dfsClient == null) { @@ -1245,13 +1253,14 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } } - public READDIR3Response link(XDR xdr, SecurityHandler securityHandler, InetAddress client) { + public READDIR3Response link(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { return new READDIR3Response(Nfs3Status.NFS3ERR_NOTSUPP); } @Override - public READDIR3Response readdir(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public READDIR3Response readdir(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { READDIR3Response response = new READDIR3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -1540,8 +1549,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public FSSTAT3Response fsstat(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public FSSTAT3Response fsstat(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { FSSTAT3Response response = new FSSTAT3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -1598,8 +1607,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public FSINFO3Response fsinfo(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public FSINFO3Response fsinfo(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { FSINFO3Response response = new FSINFO3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -1650,8 +1659,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public PATHCONF3Response pathconf(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public PATHCONF3Response pathconf(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { PATHCONF3Response response = new PATHCONF3Response(Nfs3Status.NFS3_OK); if (!checkAccessPrivilege(client, AccessPrivilege.READ_ONLY)) { @@ -1697,8 +1706,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public COMMIT3Response commit(XDR xdr, - SecurityHandler securityHandler, InetAddress client) { + public COMMIT3Response commit(XDR xdr, SecurityHandler securityHandler, + InetAddress client) { COMMIT3Response response = new COMMIT3Response(Nfs3Status.NFS3_OK); DFSClient dfsClient = clientCache.get(securityHandler.getUser()); if (dfsClient == null) { @@ -1776,25 +1785,53 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } @Override - public XDR handleInternal(RpcCall rpcCall, final XDR xdr, XDR out, - InetAddress client, Channel channel) { + public void handleInternal(ChannelHandlerContext ctx, RpcInfo info) { + RpcCall rpcCall = (RpcCall) info.header(); final NFSPROC3 nfsproc3 = NFSPROC3.fromValue(rpcCall.getProcedure()); int xid = rpcCall.getXid(); + byte[] data = new byte[info.data().readableBytes()]; + info.data().readBytes(data); + XDR xdr = new XDR(data); + XDR out = new XDR(); + InetAddress client = ((InetSocketAddress) info.remoteAddress()) + .getAddress(); + Channel channel = info.channel(); Credentials credentials = rpcCall.getCredential(); // Ignore auth only for NFSPROC3_NULL, especially for Linux clients. if (nfsproc3 != NFSPROC3.NULL) { - if (rpcCall.getCredential().getFlavor() != AuthFlavor.AUTH_SYS - && rpcCall.getCredential().getFlavor() != AuthFlavor.RPCSEC_GSS) { - LOG.info("Wrong RPC AUTH flavor, " - + rpcCall.getCredential().getFlavor() + if (credentials.getFlavor() != AuthFlavor.AUTH_SYS + && credentials.getFlavor() != AuthFlavor.RPCSEC_GSS) { + LOG.info("Wrong RPC AUTH flavor, " + credentials.getFlavor() + " is not AUTH_SYS or RPCSEC_GSS."); XDR reply = new XDR(); RpcDeniedReply rdr = new RpcDeniedReply(xid, RpcReply.ReplyState.MSG_ACCEPTED, RpcDeniedReply.RejectState.AUTH_ERROR, new VerifierNone()); rdr.write(reply); - return reply; + + ChannelBuffer buf = ChannelBuffers.wrappedBuffer(reply.asReadOnlyWrap() + .buffer()); + RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); + RpcUtil.sendRpcResponse(ctx, rsp); + return; + } + } + + if (!isIdempotent(rpcCall)) { + RpcCallCache.CacheEntry entry = rpcCallCache.checkOrAddToCache(client, + xid); + if (entry != null) { // in cache + if (entry.isCompleted()) { + LOG.info("Sending the cached reply to retransmitted request " + xid); + RpcUtil.sendRpcResponse(ctx, entry.getResponse()); + return; + } else { // else request is in progress + LOG.info("Retransmitted request, transaction still in progress " + + xid); + // Ignore the request and do nothing + return; + } } } @@ -1862,12 +1899,24 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { RpcAcceptedReply.AcceptState.PROC_UNAVAIL, new VerifierNone()).write( out); } - if (response != null) { - // TODO: currently we just return VerifierNone - out = response.writeHeaderAndResponse(out, xid, new VerifierNone()); + if (response == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("No sync response, expect an async response for request XID=" + + rpcCall.getXid()); + } + return; + } + // TODO: currently we just return VerifierNone + out = response.writeHeaderAndResponse(out, xid, new VerifierNone()); + ChannelBuffer buf = ChannelBuffers.wrappedBuffer(out.asReadOnlyWrap() + .buffer()); + RpcResponse rsp = new RpcResponse(buf, info.remoteAddress()); + + if (!isIdempotent(rpcCall)) { + rpcCallCache.callCompleted(client, xid, rsp); } - return out; + RpcUtil.sendRpcResponse(ctx, rsp); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f0bf3a22e45..e1d0296ab99 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -345,6 +345,9 @@ Release 2.1.2 - UNRELEASED NEW FEATURES + HDFS-5230. Introduce RpcInfo to decouple XDR classes from the RPC API. + (Haohui Mai via brandonli) + IMPROVEMENTS HDFS-5246. Make Hadoop nfs server port and mount daemon port From 09bb12c3948903526350f8587545d6a07bc87c09 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Mon, 30 Sep 2013 21:59:21 +0000 Subject: [PATCH 002/133] YARN-1221. With Fair Scheduler, reserved MB reported in RM web UI increases indefinitely (Siqi Li via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527794 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../scheduler/fair/AppSchedulable.java | 6 --- .../scheduler/fair/TestFairScheduler.java | 38 +++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ab5c2a72cef..f54db75fd9f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -98,6 +98,9 @@ Release 2.1.2 - UNRELEASED YARN-1157. Fixed ResourceManager UI to behave correctly when apps like distributed-shell do not set tracking urls. (Xuan Gong via vinodkv) + YARN-1221. With Fair Scheduler, reserved MB reported in RM web UI increases + indefinitely (Siqi Li via Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java index 14ec99cada5..415f125e1b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java @@ -192,10 +192,6 @@ public class AppSchedulable extends Schedulable { RMContainer rmContainer = app.reserve(node, priority, null, container); node.reserveResource(app, priority, rmContainer); - getMetrics().reserveResource(app.getUser(), - container.getResource()); - scheduler.getRootQueueMetrics().reserveResource(app.getUser(), - container.getResource()); } else { @@ -216,8 +212,6 @@ public class AppSchedulable extends Schedulable { node.unreserveResource(app); getMetrics().unreserveResource( app.getUser(), rmContainer.getContainer().getResource()); - scheduler.getRootQueueMetrics().unreserveResource( - app.getUser(), rmContainer.getContainer().getResource()); } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 5bede953372..30f874ba152 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -449,6 +449,44 @@ public class TestFairScheduler { Assert.assertEquals(3, queueManager.getLeafQueues().size()); } + @Test + public void testSchedulerRootQueueMetrics() throws InterruptedException { + + // Add a node + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Queue 1 requests full capacity of node + createSchedulingRequest(1024, "queue1", "user1", 1); + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1); + scheduler.handle(updateEvent); + + // Now queue 2 requests likewise + createSchedulingRequest(1024, "queue2", "user1", 1); + scheduler.update(); + scheduler.handle(updateEvent); + + // Make sure reserved memory gets updated correctly + assertEquals(1024, scheduler.rootMetrics.getReservedMB()); + + // Now another node checks in with capacity + RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); + NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2); + scheduler.handle(nodeEvent2); + scheduler.handle(updateEvent2); + + + // The old reservation should still be there... + assertEquals(1024, scheduler.rootMetrics.getReservedMB()); + + // ... but it should disappear when we update the first node. + scheduler.handle(updateEvent); + assertEquals(0, scheduler.rootMetrics.getReservedMB()); + } + @Test (timeout = 5000) public void testSimpleContainerAllocation() { // Add a node From 6d5577fe7be14d3f5d1a5854c2ad06bfab599277 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 30 Sep 2013 22:31:00 +0000 Subject: [PATCH 003/133] HDFS-4517. Cover class RemoteBlockReader with unit tests. Contributed by Vadim Bondarev and Dennis Y. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527807 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/TestShortCircuitLocalRead.java | 60 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e1d0296ab99..cfd19e5d9ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -300,6 +300,9 @@ Release 2.3.0 - UNRELEASED HDFS-5260. Merge zero-copy memory-mapped HDFS client reads to trunk and branch-2. (cnauroth) + HDFS-4517. Cover class RemoteBlockReader with unit tests. (Vadim Bondarev + and Dennis Y via kihwal) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java index f15da5c30e4..ed6fd745a54 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java @@ -577,4 +577,64 @@ public class TestShortCircuitLocalRead { System.out.println("Iteration " + iteration + " took " + (end - start)); fs.delete(file1, false); } + + public void testReadWithRemoteBlockReader() throws IOException, InterruptedException { + doTestShortCircuitReadWithRemoteBlockReader(true, 3*blockSize+100, getCurrentUser(), 0, false); + } + + /** + * Test that file data can be read by reading the block + * through RemoteBlockReader + * @throws IOException + */ + public void doTestShortCircuitReadWithRemoteBlockReader(boolean ignoreChecksum, int size, String shortCircuitUser, + int readOffset, boolean shortCircuitFails) throws IOException, InterruptedException { + Configuration conf = new Configuration(); + conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER, true); + conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true); + + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1) + .format(true).build(); + FileSystem fs = cluster.getFileSystem(); + // check that / exists + Path path = new Path("/"); + URI uri = cluster.getURI(); + assertTrue("/ should be a directory", fs.getFileStatus(path) + .isDirectory() == true); + + byte[] fileData = AppendTestUtil.randomBytes(seed, size); + Path file1 = new Path("filelocal.dat"); + FSDataOutputStream stm = createFile(fs, file1, 1); + + stm.write(fileData); + stm.close(); + try { + checkFileContent(uri, file1, fileData, readOffset, shortCircuitUser, conf, shortCircuitFails); + //RemoteBlockReader have unsupported method read(ByteBuffer bf) + assertTrue("RemoteBlockReader unsupported method read(ByteBuffer bf) error", + checkUnsupportedMethod(fs, file1, fileData, readOffset)); + } catch(IOException e) { + throw new IOException("doTestShortCircuitReadWithRemoteBlockReader ex error ", e); + } catch(InterruptedException inEx) { + throw inEx; + } finally { + fs.close(); + cluster.shutdown(); + } + } + + private boolean checkUnsupportedMethod(FileSystem fs, Path file, + byte[] expected, int readOffset) throws IOException { + HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(file); + ByteBuffer actual = ByteBuffer.allocateDirect(expected.length - readOffset); + IOUtils.skipFully(stm, readOffset); + try { + stm.read(actual); + } catch(UnsupportedOperationException unex) { + return true; + } + return false; + } + + } From 84e4e4b14373cb62966929994862ca58932ce6f4 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Mon, 30 Sep 2013 22:55:20 +0000 Subject: [PATCH 004/133] YARN-1247. test-container-executor has gotten out of sync with the changes to container-executor. (rvs via tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527813 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 + .../test/test-container-executor.c | 106 ++++++++++-------- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index f54db75fd9f..767b8dd7f45 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -101,6 +101,8 @@ Release 2.1.2 - UNRELEASED YARN-1221. With Fair Scheduler, reserved MB reported in RM web UI increases indefinitely (Siqi Li via Sandy Ryza) + YARN-1247. test-container-executor has gotten out of sync with the changes to container-executor. (rvs via tucu) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c index b2d7d6f4997..e995bf24132 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c @@ -32,14 +32,12 @@ #define DONT_TOUCH_FILE "dont-touch-me" #define NM_LOCAL_DIRS TEST_ROOT "/local-1," TEST_ROOT "/local-2," \ TEST_ROOT "/local-3," TEST_ROOT "/local-4," TEST_ROOT "/local-5" -#define NM_LOG_DIRS TEST_ROOT "/logdir_1," TEST_ROOT "/logdir_2," \ - TEST_ROOT "/logdir_3," TEST_ROOT "/logdir_4" +#define NM_LOG_DIRS TEST_ROOT "/logs/userlogs" #define ARRAY_SIZE 1000 static char* username = NULL; -static char* local_dirs = NULL; -static char* log_dirs = NULL; -static char* resources = NULL; +static char** local_dirs = NULL; +static char** log_dirs = NULL; /** * Run the command using the effective user id. @@ -122,6 +120,33 @@ void create_nm_roots(char ** nm_roots) { } } +void check_pid_file(const char* pid_file, pid_t mypid) { + if(access(pid_file, R_OK) != 0) { + printf("FAIL: failed to create pid file %s\n", pid_file); + exit(1); + } + int pidfd = open(pid_file, O_RDONLY); + if (pidfd == -1) { + printf("FAIL: failed to open pid file %s - %s\n", pid_file, strerror(errno)); + exit(1); + } + + char pidBuf[100]; + ssize_t bytes = read(pidfd, pidBuf, 100); + if (bytes == -1) { + printf("FAIL: failed to read from pid file %s - %s\n", pid_file, strerror(errno)); + exit(1); + } + + char myPidBuf[33]; + snprintf(myPidBuf, 33, "%d", mypid); + if (strncmp(pidBuf, myPidBuf, strlen(myPidBuf)) != 0) { + printf("FAIL: failed to find matching pid in pid file\n"); + printf("FAIL: Expected pid %d : Got %.*s", mypid, (int)bytes, pidBuf); + exit(1); + } +} + void test_get_user_directory() { char *user_dir = get_user_directory("/tmp", "user"); char *expected = "/tmp/usercache/user"; @@ -227,7 +252,7 @@ void test_check_configuration_permissions() { } void test_delete_container() { - if (initialize_user(username, extract_values(local_dirs))) { + if (initialize_user(username, local_dirs)) { printf("FAIL: failed to initialize user %s\n", username); exit(1); } @@ -458,6 +483,9 @@ void test_signal_container_group() { exit(0); } printf("Child container launched as %d\n", child); + // there's a race condition for child calling change_user and us + // calling signal_container_as_user, hence sleeping + sleep(3); if (signal_container_as_user(username, child, SIGKILL) != 0) { exit(1); } @@ -522,8 +550,8 @@ void test_init_app() { exit(1); } else if (child == 0) { char *final_pgm[] = {"touch", "my-touch-file", 0}; - if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", final_pgm, - extract_values(local_dirs), extract_values(log_dirs)) != 0) { + if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", + local_dirs, log_dirs, final_pgm) != 0) { printf("FAIL: failed in child\n"); exit(42); } @@ -546,7 +574,7 @@ void test_init_app() { exit(1); } char buffer[100000]; - sprintf(buffer, "%s/jobToken", app_dir); + sprintf(buffer, "%s/creds.txt", app_dir); if (access(buffer, R_OK) != 0) { printf("FAIL: failed to create credentials %s\n", buffer); exit(1); @@ -557,7 +585,7 @@ void test_init_app() { exit(1); } free(app_dir); - app_dir = get_app_log_directory("logs","app_4"); + app_dir = get_app_log_directory(TEST_ROOT "/logs/userlogs","app_4"); if (access(app_dir, R_OK) != 0) { printf("FAIL: failed to create app log directory %s\n", app_dir); exit(1); @@ -585,6 +613,10 @@ void test_run_container() { exit(1); } + char * cgroups_pids[] = { TEST_ROOT "/cgroups-pid1.txt", TEST_ROOT "/cgroups-pid2.txt", 0 }; + close(creat(cgroups_pids[0], O_RDWR)); + close(creat(cgroups_pids[1], O_RDWR)); + const char* script_name = TEST_ROOT "/container-script"; FILE* script = fopen(script_name, "w"); if (script == NULL) { @@ -610,23 +642,17 @@ void test_run_container() { char* container_dir = get_container_work_directory(TEST_ROOT "/local-1", username, "app_4", "container_1"); const char * pid_file = TEST_ROOT "/pid.txt"; + pid_t child = fork(); if (child == -1) { printf("FAIL: failed to fork process for init_app - %s\n", strerror(errno)); exit(1); } else if (child == 0) { - char *key = malloc(strlen(resources)); - char *value = malloc(strlen(resources)); - if (get_kv_key(resources, key, strlen(resources)) < 0 || - get_kv_value(resources, key, strlen(resources)) < 0) { - printf("FAIL: resources failed - %s\n"); - exit(1); - } if (launch_container_as_user(username, "app_4", "container_1", container_dir, script_name, TEST_ROOT "/creds.txt", pid_file, - extract_values(local_dirs), extract_values(log_dirs), - key, extract_values(value)) != 0) { + local_dirs, log_dirs, + "cgroups", cgroups_pids) != 0) { printf("FAIL: failed in child\n"); exit(42); } @@ -654,38 +680,21 @@ void test_run_container() { exit(1); } free(container_dir); - container_dir = get_app_log_directory("logs", "app_4/container_1"); + container_dir = get_app_log_directory(TEST_ROOT "/logs/userlogs", "app_4/container_1"); if (access(container_dir, R_OK) != 0) { printf("FAIL: failed to create app log directory %s\n", container_dir); exit(1); } free(container_dir); - if(access(pid_file, R_OK) != 0) { - printf("FAIL: failed to create pid file %s\n", pid_file); - exit(1); - } - int pidfd = open(pid_file, O_RDONLY); - if (pidfd == -1) { - printf("FAIL: failed to open pid file %s - %s\n", pid_file, strerror(errno)); + if (seteuid(0) != 0) { + printf("FAIL: seteuid to root failed - %s\n", strerror(errno)); exit(1); } - char pidBuf[100]; - ssize_t bytes = read(pidfd, pidBuf, 100); - if (bytes == -1) { - printf("FAIL: failed to read from pid file %s - %s\n", pid_file, strerror(errno)); - exit(1); - } - - pid_t mypid = child; - char myPidBuf[33]; - snprintf(myPidBuf, 33, "%d", mypid); - if (strncmp(pidBuf, myPidBuf, strlen(myPidBuf)) != 0) { - printf("FAIL: failed to find matching pid in pid file\n"); - printf("FAIL: Expected pid %d : Got %.*s", mypid, (int)bytes, pidBuf); - exit(1); - } + check_pid_file(pid_file, child); + check_pid_file(cgroups_pids[0], child); + check_pid_file(cgroups_pids[1], child); } int main(int argc, char **argv) { @@ -707,12 +716,10 @@ int main(int argc, char **argv) { } read_config(TEST_ROOT "/test.cfg"); - local_dirs = (char *) malloc (sizeof(char) * ARRAY_SIZE); - strcpy(local_dirs, NM_LOCAL_DIRS); - log_dirs = (char *) malloc (sizeof(char) * ARRAY_SIZE); - strcpy(log_dirs, NM_LOG_DIRS); + local_dirs = extract_values(strdup(NM_LOCAL_DIRS)); + log_dirs = extract_values(strdup(NM_LOG_DIRS)); - create_nm_roots(extract_values(local_dirs)); + create_nm_roots(local_dirs); if (getuid() == 0 && argc == 2) { username = argv[1]; @@ -754,8 +761,6 @@ int main(int argc, char **argv) { printf("\nTesting delete_app()\n"); test_delete_app(); - test_delete_user(); - test_check_user(); // the tests that change user need to be run in a subshell, so that @@ -772,6 +777,9 @@ int main(int argc, char **argv) { } seteuid(0); + // test_delete_user must run as root since that's how we use the delete_as_user + test_delete_user(); + run("rm -fr " TEST_ROOT); printf("\nFinished tests\n"); From 74d20250ffe16a85c6ef70b70e1254a77eaf03a3 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 1 Oct 2013 00:18:09 +0000 Subject: [PATCH 005/133] YARN-1070. Fixed race conditions in NodeManager during container-kill. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527827 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 6 ++- .../launcher/ContainerLaunch.java | 15 +++++- .../launcher/ContainersLauncher.java | 45 ++++-------------- .../container/TestContainer.java | 46 +++++++++++++++---- .../launcher/TestContainerLaunch.java | 5 +- 5 files changed, 67 insertions(+), 50 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 767b8dd7f45..191cf58be67 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -101,7 +101,11 @@ Release 2.1.2 - UNRELEASED YARN-1221. With Fair Scheduler, reserved MB reported in RM web UI increases indefinitely (Siqi Li via Sandy Ryza) - YARN-1247. test-container-executor has gotten out of sync with the changes to container-executor. (rvs via tucu) + YARN-1247. test-container-executor has gotten out of sync with the changes to + container-executor. (rvs via tucu) + + YARN-1070. Fixed race conditions in NodeManager during container-kill. + (Zhijie Shen via vinodkv) Release 2.1.1-beta - 2013-09-23 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 1bff008541c..edc31466f67 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -68,6 +68,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader; @@ -133,10 +134,22 @@ public class ContainerLaunch implements Callable { final List command = launchContext.getCommands(); int ret = -1; + // CONTAINER_KILLED_ON_REQUEST should not be missed if the container + // is already at KILLING + if (container.getContainerState() == ContainerState.KILLING) { + dispatcher.getEventHandler().handle( + new ContainerExitEvent(containerID, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(), + "Container terminated before launch.")); + return 0; + } + try { localResources = container.getLocalizedResources(); if (localResources == null) { - RPCUtil.getRemoteException( + throw RPCUtil.getRemoteException( "Unable to get local resources when Container " + containerID + " is at " + container.getContainerState()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java index 33e3c1c06de..ce865e3f68f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java @@ -75,20 +75,9 @@ public class ContainersLauncher extends AbstractService new ThreadFactoryBuilder() .setNameFormat("ContainersLauncher #%d") .build()); - private final Map running = - Collections.synchronizedMap(new HashMap()); - - private static final class RunningContainer { - public RunningContainer(Future submit, - ContainerLaunch launcher) { - this.runningcontainer = submit; - this.launcher = launcher; - } - - Future runningcontainer; - ContainerLaunch launcher; - } - + @VisibleForTesting + public final Map running = + Collections.synchronizedMap(new HashMap()); public ContainersLauncher(Context context, Dispatcher dispatcher, ContainerExecutor exec, LocalDirsHandlerService dirsHandler, @@ -133,38 +122,20 @@ public class ContainersLauncher extends AbstractService ContainerLaunch launch = new ContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager); - running.put(containerId, - new RunningContainer(containerLauncher.submit(launch), - launch)); + containerLauncher.submit(launch); + running.put(containerId, launch); break; case CLEANUP_CONTAINER: - RunningContainer rContainerDatum = running.remove(containerId); - if (rContainerDatum == null) { + ContainerLaunch launcher = running.remove(containerId); + if (launcher == null) { // Container not launched. So nothing needs to be done. return; } - Future rContainer = rContainerDatum.runningcontainer; - if (rContainer != null - && !rContainer.isDone()) { - // Cancel the future so that it won't be launched if it isn't already. - // If it is going to be canceled, make sure CONTAINER_KILLED_ON_REQUEST - // will not be missed if the container is already at KILLING - if (rContainer.cancel(false)) { - if (container.getContainerState() == ContainerState.KILLING) { - dispatcher.getEventHandler().handle( - new ContainerExitEvent(containerId, - ContainerEventType.CONTAINER_KILLED_ON_REQUEST, - Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(), - "Container terminated before launch.")); - } - } - } // Cleanup a container whether it is running/killed/completed, so that // no sub-processes are alive. try { - rContainerDatum.launcher.cleanupContainer(); + launcher.cleanupContainer(); } catch (IOException e) { LOG.warn("Got exception while cleaning container " + containerId + ". Ignoring."); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 14d445f5051..ebc400abb00 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; @@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEve import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType; @@ -296,8 +298,7 @@ public class TestContainer { wc.launchContainer(); reset(wc.localizerBus); wc.killContainer(); - assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, - wc.c.getContainerState()); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); wc.containerKilledOnRequest(); @@ -330,14 +331,18 @@ public class TestContainer { } @Test - public void testKillOnLocalized() throws Exception { + public void testKillOnLocalizedWhenContainerNotLaunched() throws Exception { WrappedContainer wc = null; try { wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); wc.initContainer(); wc.localizeResources(); assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + launcher.call(); + wc.drainDispatcherEvents(); assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); @@ -348,6 +353,31 @@ public class TestContainer { } } } + + @Test + public void testKillOnLocalizedWhenContainerLaunched() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); + launcher.call(); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test public void testResourceLocalizedOnLocalizationFailed() throws Exception { @@ -472,12 +502,10 @@ public class TestContainer { wc.initContainer(); wc.localizeResources(); wc.killContainer(); - assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, - wc.c.getContainerState()); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); wc.launchContainer(); - assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, - wc.c.getContainerState()); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); wc.containerKilledOnRequest(); verifyCleanupCall(wc); @@ -650,7 +678,9 @@ public class TestContainer { Context context = mock(Context.class); when(context.getApplications()).thenReturn( new ConcurrentHashMap()); - launcher = new ContainersLauncher(context, dispatcher, null, null, null); + ContainerExecutor executor = mock(ContainerExecutor.class); + launcher = + new ContainersLauncher(context, dispatcher, executor, null, null); // create a mock ExecutorService, which will not really launch // ContainerLaunch at all. launcher.containerLauncher = mock(ExecutorService.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 0a0a459bbee..6612ea679f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -682,9 +682,8 @@ public class TestContainerLaunch extends BaseContainerManagerTest { ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest) .getContainerStatuses().get(0); - int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(); - Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); + Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), + containerStatus.getExitStatus()); // Now verify the contents of the file. Script generates a message when it // receives a sigterm so we look for that. We cannot perform this check on From 9c24615ae6a8b8c5d7670269d7a4dfb5e9344b13 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Tue, 1 Oct 2013 01:24:27 +0000 Subject: [PATCH 006/133] MAPREDUCE-5551. Fix compat with hadoop-1 in SequenceFileAsBinaryOutputFormat.WritableValueBytes by re-introducing missing constructors. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527848 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 ++++ .../hadoop/mapred/SequenceFileAsBinaryOutputFormat.java | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 29c08a4aa11..c3ac3f10b35 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -249,6 +249,10 @@ Release 2.1.2 - UNRELEASED the job is really done - a bug caused by MAPREDUCE-5505. (Zhijie Shen via vinodkv) + MAPREDUCE-5551. Fix compat with hadoop-1 in + SequenceFileAsBinaryOutputFormat.WritableValueBytes by re-introducing + missing constructors. (Zhijie Shen via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SequenceFileAsBinaryOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SequenceFileAsBinaryOutputFormat.java index 60bb16ccb03..6b3a671c6d4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SequenceFileAsBinaryOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SequenceFileAsBinaryOutputFormat.java @@ -47,6 +47,13 @@ public class SequenceFileAsBinaryOutputFormat */ static protected class WritableValueBytes extends org.apache.hadoop.mapreduce .lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes { + public WritableValueBytes() { + super(); + } + + public WritableValueBytes(BytesWritable value) { + super(value); + } } /** From 4fe912df9cd5b41317a3f4037d13c247e7d1380c Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Tue, 1 Oct 2013 14:02:19 +0000 Subject: [PATCH 007/133] HDFS-4512. Cover package org.apache.hadoop.hdfs.server.common with tests. Contributed by Vadim Bondarev. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528097 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/common/TestJspHelper.java | 144 ++++++++++++++++++ 2 files changed, 147 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index cfd19e5d9ec..f66f58620ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -303,6 +303,9 @@ Release 2.3.0 - UNRELEASED HDFS-4517. Cover class RemoteBlockReader with unit tests. (Vadim Bondarev and Dennis Y via kihwal) + HDFS-4512. Cover package org.apache.hadoop.hdfs.server.common with tests. + (Vadim Bondarev via kihwal) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java index ab6ed12492e..bd523963409 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java @@ -17,6 +17,11 @@ */ package org.apache.hadoop.hdfs.server.common; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.mockito.Mockito.doAnswer; @@ -24,7 +29,10 @@ import static org.mockito.Mockito.doAnswer; import java.io.IOException; import java.io.StringReader; import java.net.InetSocketAddress; +import java.text.MessageFormat; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import javax.servlet.ServletContext; import javax.servlet.http.HttpServletRequest; @@ -43,6 +51,8 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer; import org.apache.hadoop.hdfs.web.resources.DoAsParam; import org.apache.hadoop.hdfs.web.resources.UserParam; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; @@ -54,11 +64,14 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecret import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import com.google.common.base.Strings; + public class TestJspHelper { @@ -480,5 +493,136 @@ public class TestJspHelper { JspHelper.sortNodeList(live, "pcbpused", "DSC"); Assert.assertEquals(dnDesc1, live.get(0)); Assert.assertEquals(dnDesc2, live.get(1)); + + //unexisted field comparition is d1.getHostName().compareTo(d2.getHostName()); + JspHelper.sortNodeList(live, "unexists", "ASC"); + Assert.assertEquals(dnDesc1, live.get(0)); + Assert.assertEquals(dnDesc2, live.get(1)); + + JspHelper.sortNodeList(live, "unexists", "DSC"); + Assert.assertEquals(dnDesc2, live.get(0)); + Assert.assertEquals(dnDesc1, live.get(1)); + + // test sorting by capacity + JspHelper.sortNodeList(live, "capacity", "ASC"); + Assert.assertEquals(dnDesc1, live.get(0)); + Assert.assertEquals(dnDesc2, live.get(1)); + + JspHelper.sortNodeList(live, "capacity", "DSC"); + Assert.assertEquals(dnDesc2, live.get(0)); + Assert.assertEquals(dnDesc1, live.get(1)); + + // test sorting by used + JspHelper.sortNodeList(live, "used", "ASC"); + Assert.assertEquals(dnDesc1, live.get(0)); + Assert.assertEquals(dnDesc2, live.get(1)); + + JspHelper.sortNodeList(live, "used", "DSC"); + Assert.assertEquals(dnDesc2, live.get(0)); + Assert.assertEquals(dnDesc1, live.get(1)); + + // test sorting by nondfsused + JspHelper.sortNodeList(live, "nondfsused", "ASC"); + Assert.assertEquals(dnDesc1, live.get(0)); + Assert.assertEquals(dnDesc2, live.get(1)); + + JspHelper.sortNodeList(live, "nondfsused", "DSC"); + Assert.assertEquals(dnDesc2, live.get(0)); + Assert.assertEquals(dnDesc1, live.get(1)); + + // test sorting by remaining + JspHelper.sortNodeList(live, "remaining", "ASC"); + Assert.assertEquals(dnDesc1, live.get(0)); + Assert.assertEquals(dnDesc2, live.get(1)); + + JspHelper.sortNodeList(live, "remaining", "DSC"); + Assert.assertEquals(dnDesc2, live.get(0)); + Assert.assertEquals(dnDesc1, live.get(1)); } + + @Test + public void testPrintMethods() throws IOException { + JspWriter out = mock(JspWriter.class); + HttpServletRequest req = mock(HttpServletRequest.class); + + final StringBuffer buffer = new StringBuffer(); + + ArgumentCaptor arg = ArgumentCaptor.forClass(String.class); + doAnswer(new Answer() { + @Override + public Object answer(InvocationOnMock invok) { + Object[] args = invok.getArguments(); + buffer.append((String)args[0]); + return null; + } + }).when(out).print(arg.capture()); + + + JspHelper.createTitle(out, req, "testfile.txt"); + Mockito.verify(out, Mockito.times(1)).print(Mockito.anyString()); + + JspHelper.addTableHeader(out); + Mockito.verify(out, Mockito.times(1 + 2)).print(Mockito.anyString()); + + JspHelper.addTableRow(out, new String[] {" row11", "row12 "}); + Mockito.verify(out, Mockito.times(1 + 2 + 4)).print(Mockito.anyString()); + + JspHelper.addTableRow(out, new String[] {" row11", "row12 "}, 3); + Mockito.verify(out, Mockito.times(1 + 2 + 4 + 4)).print(Mockito.anyString()); + + JspHelper.addTableRow(out, new String[] {" row21", "row22"}); + Mockito.verify(out, Mockito.times(1 + 2 + 4 + 4 + 4)).print(Mockito.anyString()); + + JspHelper.addTableFooter(out); + Mockito.verify(out, Mockito.times(1 + 2 + 4 + 4 + 4 + 1)).print(Mockito.anyString()); + + assertFalse(Strings.isNullOrEmpty(buffer.toString())); + } + + @Test + public void testReadWriteReplicaState() { + try { + DataOutputBuffer out = new DataOutputBuffer(); + DataInputBuffer in = new DataInputBuffer(); + for (HdfsServerConstants.ReplicaState repState : HdfsServerConstants.ReplicaState + .values()) { + repState.write(out); + in.reset(out.getData(), out.getLength()); + HdfsServerConstants.ReplicaState result = HdfsServerConstants.ReplicaState + .read(in); + assertTrue("testReadWrite error !!!", repState == result); + out.reset(); + in.reset(); + } + } catch (Exception ex) { + fail("testReadWrite ex error ReplicaState"); + } + } + + @Test + public void testUpgradeStatusReport() { + short status = 6; + int version = 15; + String EXPECTED__NOTF_PATTERN = "Upgrade for version {0} has been completed.\nUpgrade is not finalized."; + String EXPECTED_PATTERN = "Upgrade for version {0} is in progress. Status = {1}%"; + + UpgradeStatusReport upgradeStatusReport = new UpgradeStatusReport(version, + status, true); + assertTrue(upgradeStatusReport.getVersion() == version); + assertTrue(upgradeStatusReport.getUpgradeStatus() == status); + assertTrue(upgradeStatusReport.isFinalized()); + + assertEquals(MessageFormat.format(EXPECTED_PATTERN, version, status), + upgradeStatusReport.getStatusText(true)); + + status += 100; + upgradeStatusReport = new UpgradeStatusReport(version, status, false); + assertFalse(upgradeStatusReport.isFinalized()); + assertTrue(upgradeStatusReport.toString().equals( + MessageFormat.format(EXPECTED__NOTF_PATTERN, version))); + assertTrue(upgradeStatusReport.getStatusText(false).equals( + MessageFormat.format(EXPECTED__NOTF_PATTERN, version))); + assertTrue(upgradeStatusReport.getStatusText(true).equals( + MessageFormat.format(EXPECTED__NOTF_PATTERN, version))); + } } From ae05623a75803d4e12a902ac4a24187540f56699 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Tue, 1 Oct 2013 19:54:50 +0000 Subject: [PATCH 008/133] YARN-1010. FairScheduler: decouple container scheduling from nodemanager heartbeats. (Wei Yan via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528192 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/fair/AppSchedulable.java | 17 +++- .../scheduler/fair/FSSchedulerApp.java | 56 ++++++++++++- .../scheduler/fair/FairScheduler.java | 78 ++++++++++++++++++- .../fair/FairSchedulerConfiguration.java | 32 ++++++++ .../scheduler/fair/TestFSSchedulerApp.java | 71 +++++++++++++++++ .../scheduler/fair/TestFairScheduler.java | 57 ++++++++++++++ 7 files changed, 305 insertions(+), 9 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 191cf58be67..f2a42d056b0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -26,6 +26,9 @@ Release 2.3.0 - UNRELEASED YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu) + YARN-1010. FairScheduler: decouple container scheduling from nodemanager + heartbeats. (Wei Yan via Sandy Ryza) + IMPROVEMENTS YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java index 415f125e1b6..baf5db21c81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java @@ -310,10 +310,19 @@ public class AppSchedulable extends Schedulable { + localRequest); } - NodeType allowedLocality = app.getAllowedLocalityLevel(priority, - scheduler.getNumClusterNodes(), scheduler.getNodeLocalityThreshold(), - scheduler.getRackLocalityThreshold()); - + NodeType allowedLocality; + if (scheduler.isContinuousSchedulingEnabled()) { + allowedLocality = app.getAllowedLocalityLevelByTime(priority, + scheduler.getNodeLocalityDelayMs(), + scheduler.getRackLocalityDelayMs(), + scheduler.getClock().getTime()); + } else { + allowedLocality = app.getAllowedLocalityLevel(priority, + scheduler.getNumClusterNodes(), + scheduler.getNodeLocalityThreshold(), + scheduler.getRackLocalityThreshold()); + } + if (rackLocalRequest != null && rackLocalRequest.getNumContainers() != 0 && localRequest != null && localRequest.getNumContainers() != 0) { return assignContainer(node, priority, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java index 670e9616a81..1fe400ee07d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java @@ -464,7 +464,12 @@ public class FSSchedulerApp extends SchedulerApplication { * @param priority The priority of the container scheduled. */ synchronized public void resetSchedulingOpportunities(Priority priority) { - lastScheduledContainer.put(priority, System.currentTimeMillis()); + resetSchedulingOpportunities(priority, System.currentTimeMillis()); + } + // used for continuous scheduling + synchronized public void resetSchedulingOpportunities(Priority priority, + long currentTimeMs) { + lastScheduledContainer.put(priority, currentTimeMs); schedulingOpportunities.setCount(priority, 0); } @@ -513,6 +518,55 @@ public class FSSchedulerApp extends SchedulerApplication { return allowedLocalityLevel.get(priority); } + /** + * Return the level at which we are allowed to schedule containers. + * Given the thresholds indicating how much time passed before relaxing + * scheduling constraints. + */ + public synchronized NodeType getAllowedLocalityLevelByTime(Priority priority, + long nodeLocalityDelayMs, long rackLocalityDelayMs, + long currentTimeMs) { + + // if not being used, can schedule anywhere + if (nodeLocalityDelayMs < 0 || rackLocalityDelayMs < 0) { + return NodeType.OFF_SWITCH; + } + + // default level is NODE_LOCAL + if (! allowedLocalityLevel.containsKey(priority)) { + allowedLocalityLevel.put(priority, NodeType.NODE_LOCAL); + return NodeType.NODE_LOCAL; + } + + NodeType allowed = allowedLocalityLevel.get(priority); + + // if level is already most liberal, we're done + if (allowed.equals(NodeType.OFF_SWITCH)) { + return NodeType.OFF_SWITCH; + } + + // check waiting time + long waitTime = currentTimeMs; + if (lastScheduledContainer.containsKey(priority)) { + waitTime -= lastScheduledContainer.get(priority); + } else { + waitTime -= appSchedulable.getStartTime(); + } + + long thresholdTime = allowed.equals(NodeType.NODE_LOCAL) ? + nodeLocalityDelayMs : rackLocalityDelayMs; + + if (waitTime > thresholdTime) { + if (allowed.equals(NodeType.NODE_LOCAL)) { + allowedLocalityLevel.put(priority, NodeType.RACK_LOCAL); + resetSchedulingOpportunities(priority, currentTimeMs); + } else if (allowed.equals(NodeType.RACK_LOCAL)) { + allowedLocalityLevel.put(priority, NodeType.OFF_SWITCH); + resetSchedulingOpportunities(priority, currentTimeMs); + } + } + return allowedLocalityLevel.get(priority); + } synchronized public RMContainer allocate(NodeType type, FSSchedulerNode node, Priority priority, ResourceRequest request, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 16e7fd695af..fa4c21805be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -179,8 +179,12 @@ public class FairScheduler implements ResourceScheduler { protected boolean preemptionEnabled; protected boolean sizeBasedWeight; // Give larger weights to larger jobs protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster + protected boolean continuousSchedulingEnabled; // Continuous Scheduling enabled or not + protected int continuousSchedulingSleepMs; // Sleep time for each pass in continuous scheduling protected double nodeLocalityThreshold; // Cluster threshold for node locality protected double rackLocalityThreshold; // Cluster threshold for rack locality + protected long nodeLocalityDelayMs; // Delay for node locality + protected long rackLocalityDelayMs; // Delay for rack locality private FairSchedulerEventLog eventLog; // Machine-readable event log protected boolean assignMultiple; // Allocate multiple containers per // heartbeat @@ -582,6 +586,22 @@ public class FairScheduler implements ResourceScheduler { return rackLocalityThreshold; } + public long getNodeLocalityDelayMs() { + return nodeLocalityDelayMs; + } + + public long getRackLocalityDelayMs() { + return rackLocalityDelayMs; + } + + public boolean isContinuousSchedulingEnabled() { + return continuousSchedulingEnabled; + } + + public synchronized int getContinuousSchedulingSleepMs() { + return continuousSchedulingSleepMs; + } + public Resource getClusterCapacity() { return clusterCapacity; } @@ -907,6 +927,37 @@ public class FairScheduler implements ResourceScheduler { completedContainer, RMContainerEventType.FINISHED); } + if (continuousSchedulingEnabled) { + if (!completedContainers.isEmpty()) { + attemptScheduling(node); + } + } else { + attemptScheduling(node); + } + } + + private void continuousScheduling() { + while (true) { + for (FSSchedulerNode node : nodes.values()) { + try { + if (Resources.fitsIn(minimumAllocation, node.getAvailableResource())) { + attemptScheduling(node); + } + } catch (Throwable ex) { + LOG.warn("Error while attempting scheduling for node " + node + ": " + + ex.toString(), ex); + } + } + try { + Thread.sleep(getContinuousSchedulingSleepMs()); + } catch (InterruptedException e) { + LOG.warn("Error while doing sleep in continuous scheduling: " + + e.toString(), e); + } + } + } + + private synchronized void attemptScheduling(FSSchedulerNode node) { // Assign new containers... // 1. Check for reserved applications // 2. Schedule if there are no reservations @@ -914,19 +965,18 @@ public class FairScheduler implements ResourceScheduler { AppSchedulable reservedAppSchedulable = node.getReservedAppSchedulable(); if (reservedAppSchedulable != null) { Priority reservedPriority = node.getReservedContainer().getReservedPriority(); - if (reservedAppSchedulable != null && - !reservedAppSchedulable.hasContainerForNode(reservedPriority, node)) { + if (!reservedAppSchedulable.hasContainerForNode(reservedPriority, node)) { // Don't hold the reservation if app can no longer use it LOG.info("Releasing reservation that cannot be satisfied for application " + reservedAppSchedulable.getApp().getApplicationAttemptId() - + " on node " + nm); + + " on node " + node); reservedAppSchedulable.unreserve(reservedPriority, node); reservedAppSchedulable = null; } else { // Reservation exists; try to fulfill the reservation LOG.info("Trying to fulfill reservation for application " + reservedAppSchedulable.getApp().getApplicationAttemptId() - + " on node: " + nm); + + " on node: " + node); node.getReservedAppSchedulable().assignReservedContainer(node); } @@ -1060,8 +1110,13 @@ public class FairScheduler implements ResourceScheduler { maximumAllocation = this.conf.getMaximumAllocation(); incrAllocation = this.conf.getIncrementAllocation(); userAsDefaultQueue = this.conf.getUserAsDefaultQueue(); + continuousSchedulingEnabled = this.conf.isContinuousSchedulingEnabled(); + continuousSchedulingSleepMs = + this.conf.getContinuousSchedulingSleepMs(); nodeLocalityThreshold = this.conf.getLocalityThresholdNode(); rackLocalityThreshold = this.conf.getLocalityThresholdRack(); + nodeLocalityDelayMs = this.conf.getLocalityDelayNodeMs(); + rackLocalityDelayMs = this.conf.getLocalityDelayRackMs(); preemptionEnabled = this.conf.getPreemptionEnabled(); assignMultiple = this.conf.getAssignMultiple(); maxAssign = this.conf.getMaxAssign(); @@ -1088,6 +1143,21 @@ public class FairScheduler implements ResourceScheduler { updateThread.setName("FairSchedulerUpdateThread"); updateThread.setDaemon(true); updateThread.start(); + + if (continuousSchedulingEnabled) { + // start continuous scheduling thread + Thread schedulingThread = new Thread( + new Runnable() { + @Override + public void run() { + continuousScheduling(); + } + } + ); + schedulingThread.setName("ContinuousScheduling"); + schedulingThread.setDaemon(true); + schedulingThread.start(); + } } else { try { queueMgr.reloadAllocs(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java index acdd40e26ae..a3de8942ea4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java @@ -66,6 +66,22 @@ public class FairSchedulerConfiguration extends Configuration { protected static final float DEFAULT_LOCALITY_THRESHOLD_RACK = DEFAULT_LOCALITY_THRESHOLD; + /** Delay for node locality. */ + protected static final String LOCALITY_DELAY_NODE_MS = CONF_PREFIX + "locality-delay-node-ms"; + protected static final long DEFAULT_LOCALITY_DELAY_NODE_MS = -1L; + + /** Delay for rack locality. */ + protected static final String LOCALITY_DELAY_RACK_MS = CONF_PREFIX + "locality-delay-rack-ms"; + protected static final long DEFAULT_LOCALITY_DELAY_RACK_MS = -1L; + + /** Enable continuous scheduling or not. */ + protected static final String CONTINUOUS_SCHEDULING_ENABLED = CONF_PREFIX + "continuous-scheduling-enabled"; + protected static final boolean DEFAULT_CONTINUOUS_SCHEDULING_ENABLED = false; + + /** Sleep time of each pass in continuous scheduling (5ms in default) */ + protected static final String CONTINUOUS_SCHEDULING_SLEEP_MS = CONF_PREFIX + "continuous-scheduling-sleep-ms"; + protected static final int DEFAULT_CONTINUOUS_SCHEDULING_SLEEP_MS = 5; + /** Whether preemption is enabled. */ protected static final String PREEMPTION = CONF_PREFIX + "preemption"; protected static final boolean DEFAULT_PREEMPTION = false; @@ -134,6 +150,22 @@ public class FairSchedulerConfiguration extends Configuration { return getFloat(LOCALITY_THRESHOLD_RACK, DEFAULT_LOCALITY_THRESHOLD_RACK); } + public boolean isContinuousSchedulingEnabled() { + return getBoolean(CONTINUOUS_SCHEDULING_ENABLED, DEFAULT_CONTINUOUS_SCHEDULING_ENABLED); + } + + public int getContinuousSchedulingSleepMs() { + return getInt(CONTINUOUS_SCHEDULING_SLEEP_MS, DEFAULT_CONTINUOUS_SCHEDULING_SLEEP_MS); + } + + public long getLocalityDelayNodeMs() { + return getLong(LOCALITY_DELAY_NODE_MS, DEFAULT_LOCALITY_DELAY_NODE_MS); + } + + public long getLocalityDelayRackMs() { + return getLong(LOCALITY_DELAY_RACK_MS, DEFAULT_LOCALITY_DELAY_RACK_MS); + } + public boolean getPreemptionEnabled() { return getBoolean(PREEMPTION, DEFAULT_PREEMPTION); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java index 8a53bd036be..491235e7605 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java @@ -25,11 +25,25 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; +import org.apache.hadoop.yarn.util.Clock; import org.junit.Test; import org.mockito.Mockito; public class TestFSSchedulerApp { + private class MockClock implements Clock { + private long time = 0; + @Override + public long getTime() { + return time; + } + + public void tick(int seconds) { + time = time + seconds * 1000; + } + + } + private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) { ApplicationId appIdImpl = ApplicationId.newInstance(0, appId); ApplicationAttemptId attId = @@ -93,6 +107,63 @@ public class TestFSSchedulerApp { prio, 10, nodeLocalityThreshold, rackLocalityThreshold)); } + @Test + public void testDelaySchedulingForContinuousScheduling() + throws InterruptedException { + Queue queue = Mockito.mock(Queue.class); + Priority prio = Mockito.mock(Priority.class); + Mockito.when(prio.getPriority()).thenReturn(1); + + MockClock clock = new MockClock(); + + long nodeLocalityDelayMs = 5 * 1000L; // 5 seconds + long rackLocalityDelayMs = 6 * 1000L; // 6 seconds + + ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1); + FSSchedulerApp schedulerApp = + new FSSchedulerApp(applicationAttemptId, "user1", queue, + null, null); + AppSchedulable appSchedulable = Mockito.mock(AppSchedulable.class); + long startTime = clock.getTime(); + Mockito.when(appSchedulable.getStartTime()).thenReturn(startTime); + schedulerApp.setAppSchedulable(appSchedulable); + + // Default level should be node-local + assertEquals(NodeType.NODE_LOCAL, + schedulerApp.getAllowedLocalityLevelByTime(prio, + nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); + + // after 4 seconds should remain node local + clock.tick(4); + assertEquals(NodeType.NODE_LOCAL, + schedulerApp.getAllowedLocalityLevelByTime(prio, + nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); + + // after 6 seconds should switch to rack local + clock.tick(2); + assertEquals(NodeType.RACK_LOCAL, + schedulerApp.getAllowedLocalityLevelByTime(prio, + nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); + + // manually set back to node local + schedulerApp.resetAllowedLocalityLevel(prio, NodeType.NODE_LOCAL); + schedulerApp.resetSchedulingOpportunities(prio, clock.getTime()); + assertEquals(NodeType.NODE_LOCAL, + schedulerApp.getAllowedLocalityLevelByTime(prio, + nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); + + // Now escalate again to rack-local, then to off-switch + clock.tick(6); + assertEquals(NodeType.RACK_LOCAL, + schedulerApp.getAllowedLocalityLevelByTime(prio, + nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); + + clock.tick(7); + assertEquals(NodeType.OFF_SWITCH, + schedulerApp.getAllowedLocalityLevelByTime(prio, + nodeLocalityDelayMs, rackLocalityDelayMs, clock.getTime())); + } + @Test /** * Ensure that when negative paramaters are given (signaling delay scheduling diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 30f874ba152..84c9a37d5d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -59,6 +59,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -298,6 +299,14 @@ public class TestFairScheduler { conf.setBoolean(FairSchedulerConfiguration.SIZE_BASED_WEIGHT, true); conf.setDouble(FairSchedulerConfiguration.LOCALITY_THRESHOLD_NODE, .5); conf.setDouble(FairSchedulerConfiguration.LOCALITY_THRESHOLD_RACK, .7); + conf.setBoolean(FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_ENABLED, + true); + conf.setInt(FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_SLEEP_MS, + 10); + conf.setInt(FairSchedulerConfiguration.LOCALITY_DELAY_RACK_MS, + 5000); + conf.setInt(FairSchedulerConfiguration.LOCALITY_DELAY_NODE_MS, + 5000); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 1024); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); conf.setInt(FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, @@ -308,6 +317,11 @@ public class TestFairScheduler { Assert.assertEquals(true, scheduler.sizeBasedWeight); Assert.assertEquals(.5, scheduler.nodeLocalityThreshold, .01); Assert.assertEquals(.7, scheduler.rackLocalityThreshold, .01); + Assert.assertTrue("The continuous scheduling should be enabled", + scheduler.continuousSchedulingEnabled); + Assert.assertEquals(10, scheduler.continuousSchedulingSleepMs); + Assert.assertEquals(5000, scheduler.nodeLocalityDelayMs); + Assert.assertEquals(5000, scheduler.rackLocalityDelayMs); Assert.assertEquals(1024, scheduler.getMaximumResourceCapability().getMemory()); Assert.assertEquals(512, scheduler.getMinimumResourceCapability().getMemory()); Assert.assertEquals(128, @@ -2255,4 +2269,47 @@ public class TestFairScheduler { fs.applications, FSSchedulerApp.class); } + @Test + public void testContinuousScheduling() throws Exception { + // set continuous scheduling enabled + FairScheduler fs = new FairScheduler(); + Configuration conf = createConfiguration(); + conf.setBoolean(FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_ENABLED, + true); + fs.reinitialize(conf, resourceManager.getRMContext()); + Assert.assertTrue("Continuous scheduling should be enabled.", + fs.isContinuousSchedulingEnabled()); + + // Add one node + RMNode node1 = + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + fs.handle(nodeEvent1); + + // available resource + Assert.assertEquals(fs.getClusterCapacity().getMemory(), 8 * 1024); + Assert.assertEquals(fs.getClusterCapacity().getVirtualCores(), 8); + + // send application request + ApplicationAttemptId appAttemptId = + createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); + fs.addApplication(appAttemptId, "queue11", "user11"); + List ask = new ArrayList(); + ResourceRequest request = + createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); + ask.add(request); + fs.allocate(appAttemptId, ask, new ArrayList(), null, null); + + // waiting for continuous_scheduler_sleep_time + // at least one pass + Thread.sleep(fs.getConf().getContinuousSchedulingSleepMs() + 500); + + // check consumption + Resource consumption = + fs.applications.get(appAttemptId).getCurrentConsumption(); + Assert.assertEquals(1024, consumption.getMemory()); + Assert.assertEquals(1, consumption.getVirtualCores()); + } + } From cc8a0cab59e9f7db462e5bebbe0046a62b463c3f Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Tue, 1 Oct 2013 20:01:20 +0000 Subject: [PATCH 009/133] MAPREDUCE-5544. JobClient#getJob loads job conf twice. (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528196 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 2 ++ .../main/java/org/apache/hadoop/mapred/JobClient.java | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c3ac3f10b35..6421c6d543b 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -253,6 +253,8 @@ Release 2.1.2 - UNRELEASED SequenceFileAsBinaryOutputFormat.WritableValueBytes by re-introducing missing constructors. (Zhijie Shen via acmurthy) + MAPREDUCE-5544. JobClient#getJob loads job conf twice. (Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java index 0393130ad53..89a966eb84c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java @@ -173,7 +173,12 @@ public class JobClient extends CLI { * job completes.) */ public NetworkedJob(JobStatus status, Cluster cluster) throws IOException { - job = Job.getInstance(cluster, status, new JobConf(status.getJobFile())); + this(status, cluster, new JobConf(status.getJobFile())); + } + + private NetworkedJob(JobStatus status, Cluster cluster, JobConf conf) + throws IOException { + this(Job.getInstance(cluster, status, conf)); } public NetworkedJob(Job job) throws IOException { @@ -592,7 +597,8 @@ public class JobClient extends CLI { if (job != null) { JobStatus status = JobStatus.downgrade(job.getStatus()); if (status != null) { - return new NetworkedJob(status, cluster); + return new NetworkedJob(status, cluster, + new JobConf(job.getConfiguration())); } } } catch (InterruptedException ie) { From aa2745abe596c610157f6e3be9c23ba8cc1e21d0 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Tue, 1 Oct 2013 20:14:18 +0000 Subject: [PATCH 010/133] YARN-1228. Clean up Fair Scheduler configuration loading. (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528201 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../fair/FairSchedulerConfiguration.java | 34 ++++++++++++++++--- .../scheduler/fair/QueueManager.java | 34 +++---------------- .../fair/TestFairSchedulerConfiguration.java | 16 +++++++++ .../test/resources/test-fair-scheduler.xml | 22 ++++++++++++ .../src/site/apt/FairScheduler.apt.vm | 16 +++++---- 6 files changed, 84 insertions(+), 40 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/test-fair-scheduler.xml diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index f2a42d056b0..30b56f67232 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -85,6 +85,8 @@ Release 2.1.2 - UNRELEASED YARN-899. Added back queue level administrator-acls so that there is no regression w.r.t 1.x. (Xuan Gong via vinodkv) + YARN-1228. Clean up Fair Scheduler configuration loading. (Sandy Ryza) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java index a3de8942ea4..edfc8fa83e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java @@ -18,9 +18,12 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import java.io.File; +import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; @@ -33,6 +36,9 @@ import org.apache.hadoop.yarn.util.resource.Resources; @Evolving public class FairSchedulerConfiguration extends Configuration { + public static final Log LOG = LogFactory.getLog( + FairSchedulerConfiguration.class.getName()); + /** Increment request grant-able by the RM scheduler. * These properties are looked up in the yarn-site.xml */ public static final String RM_SCHEDULER_INCREMENT_ALLOCATION_MB = @@ -42,11 +48,10 @@ public class FairSchedulerConfiguration extends Configuration { YarnConfiguration.YARN_PREFIX + "scheduler.increment-allocation-vcores"; public static final int DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES = 1; - public static final String FS_CONFIGURATION_FILE = "fair-scheduler.xml"; - private static final String CONF_PREFIX = "yarn.scheduler.fair."; protected static final String ALLOCATION_FILE = CONF_PREFIX + "allocation.file"; + protected static final String DEFAULT_ALLOCATION_FILE = "fair-scheduler.xml"; protected static final String EVENT_LOG_DIR = "eventlog.dir"; /** Whether to use the user name as the queue name (instead of "default") if @@ -105,7 +110,6 @@ public class FairSchedulerConfiguration extends Configuration { public FairSchedulerConfiguration(Configuration conf) { super(conf); - addResource(FS_CONFIGURATION_FILE); } public Resource getMinimumAllocation() { @@ -182,8 +186,28 @@ public class FairSchedulerConfiguration extends Configuration { return getBoolean(SIZE_BASED_WEIGHT, DEFAULT_SIZE_BASED_WEIGHT); } - public String getAllocationFile() { - return get(ALLOCATION_FILE); + /** + * Path to XML file containing allocations. If the + * path is relative, it is searched for in the + * classpath, but loaded like a regular File. + */ + public File getAllocationFile() { + String allocFilePath = get(ALLOCATION_FILE, DEFAULT_ALLOCATION_FILE); + File allocFile = new File(allocFilePath); + if (!allocFile.isAbsolute()) { + URL url = Thread.currentThread().getContextClassLoader() + .getResource(allocFilePath); + if (url == null) { + LOG.warn(allocFilePath + " not found on the classpath."); + allocFile = null; + } else if (!url.getProtocol().equalsIgnoreCase("file")) { + throw new RuntimeException("Allocation file " + url + + " found on the classpath is not on the local filesystem."); + } else { + allocFile = new File(url.getPath()); + } + } + return allocFile; } public String getEventlogDir() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java index 839b190bb1b..7560309f5e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java @@ -75,11 +75,8 @@ public class QueueManager { private final FairScheduler scheduler; - private Object allocFile; // Path to XML file containing allocations. This - // is either a URL to specify a classpath resource - // (if the fair-scheduler.xml on the classpath is - // used) or a String to specify an absolute path (if - // mapred.fairscheduler.allocation.file is used). + // Path to XML file containing allocations. + private File allocFile; private final Collection leafQueues = new CopyOnWriteArrayList(); @@ -107,16 +104,7 @@ public class QueueManager { queues.put(rootQueue.getName(), rootQueue); this.allocFile = conf.getAllocationFile(); - if (allocFile == null) { - // No allocation file specified in jobconf. Use the default allocation - // file, fair-scheduler.xml, looking for it on the classpath. - allocFile = new Configuration().getResource("fair-scheduler.xml"); - if (allocFile == null) { - LOG.error("The fair scheduler allocation file fair-scheduler.xml was " - + "not found on the classpath, and no other config file is given " - + "through mapred.fairscheduler.allocation.file."); - } - } + reloadAllocs(); lastSuccessfulReload = scheduler.getClock().getTime(); lastReloadAttempt = scheduler.getClock().getTime(); @@ -255,14 +243,7 @@ public class QueueManager { try { // Get last modified time of alloc file depending whether it's a String // (for a path name) or an URL (for a classloader resource) - long lastModified; - if (allocFile instanceof String) { - File file = new File((String) allocFile); - lastModified = file.lastModified(); - } else { // allocFile is an URL - URLConnection conn = ((URL) allocFile).openConnection(); - lastModified = conn.getLastModified(); - } + long lastModified = allocFile.lastModified(); if (lastModified > lastSuccessfulReload && time > lastModified + ALLOC_RELOAD_WAIT) { reloadAllocs(); @@ -321,12 +302,7 @@ public class QueueManager { DocumentBuilderFactory.newInstance(); docBuilderFactory.setIgnoringComments(true); DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); - Document doc; - if (allocFile instanceof String) { - doc = builder.parse(new File((String) allocFile)); - } else { - doc = builder.parse(allocFile.toString()); - } + Document doc = builder.parse(allocFile); Element root = doc.getDocumentElement(); if (!"allocations".equals(root.getTagName())) throw new AllocationConfigurationException("Bad fair scheduler config " + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java index 12ed7792676..da8a183c10a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerConfiguration.java @@ -20,6 +20,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration.parseResourceConfigValue; import static org.junit.Assert.assertEquals; +import java.io.File; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.junit.Test; @@ -55,4 +60,15 @@ public class TestFairSchedulerConfiguration { public void testGibberish() throws Exception { parseResourceConfigValue("1o24vc0res"); } + + @Test + public void testGetAllocationFileFromClasspath() { + FairSchedulerConfiguration conf = new FairSchedulerConfiguration( + new Configuration()); + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, + "test-fair-scheduler.xml"); + File allocationFile = conf.getAllocationFile(); + Assert.assertEquals("test-fair-scheduler.xml", allocationFile.getName()); + Assert.assertTrue(allocationFile.exists()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/test-fair-scheduler.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/test-fair-scheduler.xml new file mode 100644 index 00000000000..db160c9063c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/test-fair-scheduler.xml @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm index 84d6ca29119..7008c207685 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm @@ -81,7 +81,7 @@ Hadoop MapReduce Next Generation - Fair Scheduler distribute the resources assigned to them to their children in the same fashion. Applications may only be scheduled on leaf queues. Queues can be specified as children of other queues by placing them as sub-elements of - their parents in the fair scheduler configuration file. + their parents in the fair scheduler allocation file. A queue's name starts with the names of its parents, with periods as separators. So a queue named "queue1" under the root queue, would be referred @@ -118,16 +118,20 @@ Hadoop MapReduce Next Generation - Fair Scheduler Customizing the Fair Scheduler typically involves altering two files. First, scheduler-wide options can be set by adding configuration properties in the yarn-site.xml file in your existing configuration directory. Second, in - most cases users will want to create a manifest file listing which queues - exist and their respective weights and capacities. The location of this file - is flexible - but it must be declared in yarn-site.xml. + most cases users will want to create an allocation file listing which queues + exist and their respective weights and capacities. The allocation file + is reloaded every 10 seconds, allowing changes to be made on the fly. + +Properties that can be placed in yarn-site.xml * <<>> * Path to allocation file. An allocation file is an XML manifest describing queues and their properties, in addition to certain policy defaults. This file - must be in XML format as described in the next section. - Defaults to fair-scheduler.xml in configuration directory. + must be in the XML format described in the next section. If a relative path is + given, the file is searched for on the classpath (which typically includes + the Hadoop conf directory). + Defaults to fair-scheduler.xml. * <<>> From b2a7811fe79e0d54d77c6e944a57b715976ff643 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Tue, 1 Oct 2013 22:28:02 +0000 Subject: [PATCH 011/133] YARN-1215. Yarn URL should include userinfo. Contributed by Chuan Liu. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528233 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../apache/hadoop/yarn/api/records/URL.java | 16 +++++++++++++++ .../src/main/proto/yarn_protos.proto | 1 + .../yarn/api/records/impl/pb/URLPBImpl.java | 20 +++++++++++++++++++ .../hadoop/yarn/util/ConverterUtils.java | 6 ++++++ .../hadoop/yarn/util/TestConverterUtils.java | 8 ++++++++ 6 files changed, 53 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 30b56f67232..3501ed423d2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -54,6 +54,8 @@ Release 2.3.0 - UNRELEASED YARN-1188. The context of QueueMetrics becomes default when using FairScheduler (Tsuyoshi Ozawa via Sandy Ryza) + YARN-1215. Yarn URL should include userinfo. (Chuan Liu via cnauroth) + Release 2.2.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/URL.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/URL.java index 769494e2e9e..4261117b108 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/URL.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/URL.java @@ -56,6 +56,22 @@ public abstract class URL { @Stable public abstract void setScheme(String scheme); + /** + * Get the user info of the URL. + * @return user info of the URL + */ + @Public + @Stable + public abstract String getUserInfo(); + + /** + * Set the user info of the URL. + * @param userInfo user info of the URL + */ + @Public + @Stable + public abstract void setUserInfo(String userInfo); + /** * Get the host of the URL. * @return host of the URL diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index f49b1653b9c..31923068cbd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -100,6 +100,7 @@ message URLProto { optional string host = 2; optional int32 port = 3; optional string file = 4; + optional string userInfo = 5; } enum LocalResourceVisibilityProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/URLPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/URLPBImpl.java index c5586c766d5..ab31d0c32f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/URLPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/URLPBImpl.java @@ -113,6 +113,26 @@ public class URLPBImpl extends URL { } builder.setScheme((scheme)); } + + @Override + public String getUserInfo() { + URLProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasUserInfo()) { + return null; + } + return (p.getUserInfo()); + } + + @Override + public void setUserInfo(String userInfo) { + maybeInitBuilder(); + if (userInfo == null) { + builder.clearUserInfo(); + return; + } + builder.setUserInfo((userInfo)); + } + @Override public String getHost() { URLProtoOrBuilder p = viaProto ? proto : builder; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java index 596ae28d25f..5fff8f4e779 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java @@ -69,6 +69,9 @@ public class ConverterUtils { String authority = ""; if (url.getHost() != null) { authority = url.getHost(); + if (url.getUserInfo() != null) { + authority = url.getUserInfo() + "@" + authority; + } if (url.getPort() > 0) { authority += ":" + url.getPort(); } @@ -102,6 +105,9 @@ public class ConverterUtils { if (uri.getHost() != null) { url.setHost(uri.getHost()); } + if (uri.getUserInfo() != null) { + url.setUserInfo(uri.getUserInfo()); + } url.setPort(uri.getPort()); url.setScheme(uri.getScheme()); url.setFile(uri.getPath()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestConverterUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestConverterUtils.java index 2d6793b926b..21af4555e37 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestConverterUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestConverterUtils.java @@ -38,6 +38,14 @@ public class TestConverterUtils { assertEquals(expectedPath, actualPath); } + @Test + public void testConvertUrlWithUserinfo() throws URISyntaxException { + Path expectedPath = new Path("foo://username:password@example.com:8042"); + URL url = ConverterUtils.getYarnUrlFromPath(expectedPath); + Path actualPath = ConverterUtils.getPathFromYarnURL(url); + assertEquals(expectedPath, actualPath); + } + @Test public void testContainerId() throws URISyntaxException { ContainerId id = TestContainerId.newContainerId(0, 0, 0, 0); From db06f1bcb98270cd1c36e314f818886f1ef7fd77 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Tue, 1 Oct 2013 22:34:31 +0000 Subject: [PATCH 012/133] MAPREDUCE-4421. Run MapReduce framework via the distributed cache. Contributed by Jason Lowe git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528237 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 2 + .../hadoop/mapreduce/v2/util/MRApps.java | 52 +++++++- .../hadoop/mapreduce/v2/util/TestMRApps.java | 41 +++++- .../apache/hadoop/mapreduce/JobSubmitter.java | 48 ++++++- .../apache/hadoop/mapreduce/MRJobConfig.java | 6 + .../src/main/resources/mapred-default.xml | 22 +++- .../site/apt/DistributedCacheDeploy.apt.vm | 120 ++++++++++++++++++ hadoop-project/src/site/site.xml | 1 + 8 files changed, 279 insertions(+), 13 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 6421c6d543b..6c755ea1928 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -162,6 +162,8 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and Aaron Kimball via Sandy Ryza) + MAPREDUCE-4421. Run MapReduce framework via the distributed cache (jlowe) + OPTIMIZATIONS MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index bf021b8eaa2..57c4eaa5669 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.util; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; +import java.net.URISyntaxException; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; @@ -133,6 +134,30 @@ public class MRApps extends Apps { return TaskAttemptStateUI.valueOf(attemptStateStr); } + // gets the base name of the MapReduce framework or null if no + // framework was configured + private static String getMRFrameworkName(Configuration conf) { + String frameworkName = null; + String framework = + conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, ""); + if (!framework.isEmpty()) { + URI uri; + try { + uri = new URI(framework); + } catch (URISyntaxException e) { + throw new IllegalArgumentException("Unable to parse '" + framework + + "' as a URI, check the setting for " + + MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); + } + + frameworkName = uri.getFragment(); + if (frameworkName == null) { + frameworkName = new Path(uri).getName(); + } + } + return frameworkName; + } + private static void setMRFrameworkClasspath( Map environment, Configuration conf) throws IOException { // Propagate the system classpath when using the mini cluster @@ -141,18 +166,33 @@ public class MRApps extends Apps { System.getProperty("java.class.path")); } - // Add standard Hadoop classes - for (String c : conf.getStrings( - YarnConfiguration.YARN_APPLICATION_CLASSPATH, - YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { - Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c - .trim()); + // if the framework is specified then only use the MR classpath + String frameworkName = getMRFrameworkName(conf); + if (frameworkName == null) { + // Add standard Hadoop classes + for (String c : conf.getStrings( + YarnConfiguration.YARN_APPLICATION_CLASSPATH, + YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { + Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c + .trim()); + } } + + boolean foundFrameworkInClasspath = (frameworkName == null); for (String c : conf.getStrings( MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)) { Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c .trim()); + if (!foundFrameworkInClasspath) { + foundFrameworkInClasspath = c.contains(frameworkName); + } + } + + if (!foundFrameworkInClasspath) { + throw new IllegalArgumentException( + "Could not locate MapReduce framework name '" + frameworkName + + "' in " + MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH); } // TODO: Remove duplicates. } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java index 4128f104502..e1d32f7aa9c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java @@ -282,7 +282,46 @@ public class TestMRApps { assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not in the app" + " classpath!", expectedAppClasspath, appCp); } - + + @Test (timeout = 3000000) + public void testSetClasspathWithFramework() throws IOException { + final String FRAMEWORK_NAME = "some-framework-name"; + final String FRAMEWORK_PATH = "some-framework-path#" + FRAMEWORK_NAME; + Configuration conf = new Configuration(); + conf.set(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, FRAMEWORK_PATH); + Map env = new HashMap(); + try { + MRApps.setClasspath(env, conf); + fail("Failed to catch framework path set without classpath change"); + } catch (IllegalArgumentException e) { + assertTrue("Unexpected IllegalArgumentException", + e.getMessage().contains("Could not locate MapReduce framework name '" + + FRAMEWORK_NAME + "'")); + } + + env.clear(); + final String FRAMEWORK_CLASSPATH = FRAMEWORK_NAME + "/*.jar"; + conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, FRAMEWORK_CLASSPATH); + MRApps.setClasspath(env, conf); + final String stdClasspath = StringUtils.join(File.pathSeparator, + Arrays.asList("job.jar/job.jar", "job.jar/classes/", "job.jar/lib/*", + ApplicationConstants.Environment.PWD.$() + "/*")); + String expectedClasspath = StringUtils.join(File.pathSeparator, + Arrays.asList(ApplicationConstants.Environment.PWD.$(), + FRAMEWORK_CLASSPATH, stdClasspath)); + assertEquals("Incorrect classpath with framework and no user precedence", + expectedClasspath, env.get("CLASSPATH")); + + env.clear(); + conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); + MRApps.setClasspath(env, conf); + expectedClasspath = StringUtils.join(File.pathSeparator, + Arrays.asList(ApplicationConstants.Environment.PWD.$(), + stdClasspath, FRAMEWORK_CLASSPATH)); + assertEquals("Incorrect classpath with framework and user precedence", + expectedClasspath, env.get("CLASSPATH")); + } + @Test (timeout = 30000) public void testSetupDistributedCacheEmpty() throws IOException { Configuration conf = new Configuration(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java index d83a3dd7ab9..94e71257498 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java @@ -39,6 +39,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -340,11 +341,12 @@ class JobSubmitter { //validate the jobs output specs checkSpecs(job); - - Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, - job.getConfiguration()); - //configure the command line options correctly on the submitting dfs + Configuration conf = job.getConfiguration(); + addMRFrameworkToDistributedCache(conf); + + Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf); + //configure the command line options correctly on the submitting dfs InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { submitHostAddress = ip.getHostAddress(); @@ -602,7 +604,6 @@ class JobSubmitter { } //get secret keys and tokens and store them into TokenCache - @SuppressWarnings("unchecked") private void populateTokenCache(Configuration conf, Credentials credentials) throws IOException{ readTokensFromFiles(conf, credentials); @@ -618,4 +619,41 @@ class JobSubmitter { TokenCache.obtainTokensForNamenodes(credentials, ps, conf); } } + + @SuppressWarnings("deprecation") + private static void addMRFrameworkToDistributedCache(Configuration conf) + throws IOException { + String framework = + conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, ""); + if (!framework.isEmpty()) { + URI uri; + try { + uri = new URI(framework); + } catch (URISyntaxException e) { + throw new IllegalArgumentException("Unable to parse '" + framework + + "' as a URI, check the setting for " + + MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); + } + + String linkedName = uri.getFragment(); + + // resolve any symlinks in the URI path so using a "current" symlink + // to point to a specific version shows the specific version + // in the distributed cache configuration + FileSystem fs = FileSystem.get(conf); + Path frameworkPath = fs.makeQualified( + new Path(uri.getScheme(), uri.getAuthority(), uri.getPath())); + FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), conf); + frameworkPath = fc.resolvePath(frameworkPath); + uri = frameworkPath.toUri(); + try { + uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), + null, linkedName); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + + DistributedCache.addCacheArchive(uri, conf); + } + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 24c33994493..b8a40d178eb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -649,6 +649,12 @@ public interface MRJobConfig { public static final String MAPREDUCE_APPLICATION_CLASSPATH = "mapreduce.application.classpath"; + /** + * Path to MapReduce framework archive + */ + public static final String MAPREDUCE_APPLICATION_FRAMEWORK_PATH = + "mapreduce.application.framework.path"; + /** * Default CLASSPATH for all YARN MapReduce applications. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index dad0264832e..c08836d45ee 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1024,11 +1024,31 @@ CLASSPATH for MR applications. A comma-separated list - of CLASSPATH entries + of CLASSPATH entries. If mapreduce.application.framework is set then this + must specify the appropriate classpath for that archive, and the name of + the archive must be present in the classpath. mapreduce.application.classpath $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + Path to the MapReduce framework archive. If set, the framework + archive will automatically be distributed along with the job, and this + path would normally reside in a public location in an HDFS filesystem. As + with distributed cache files, this can be a URL with a fragment specifying + the alias to use for the archive name. For example, + hdfs:/mapred/framework/hadoop-mapreduce-2.1.1.tar.gz#mrframework would + alias the localized archive as "mrframework". + + Note that mapreduce.application.classpath must include the appropriate + classpath for the specified framework. The base name of the archive, or + alias of the archive if an alias is used, must appear in the specified + classpath. + + mapreduce.application.framework.path + + + mapreduce.job.classloader false diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm new file mode 100644 index 00000000000..302f8d110af --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm @@ -0,0 +1,120 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop Map Reduce Next Generation-${project.version} - Distributed Cache Deploy + --- + --- + ${maven.build.timestamp} + +Hadoop MapReduce Next Generation - Distributed Cache Deploy + + \[ {{{./index.html}Go Back}} \] + +* Introduction + + The MapReduce application framework has rudimentary support for deploying a + new version of the MapReduce framework via the distributed cache. By setting + the appropriate configuration properties, users can run a different version + of MapReduce than the one initially deployed to the cluster. For example, + cluster administrators can place multiple versions of MapReduce in HDFS and + configure <<>> to specify which version jobs will use by + default. This allows the administrators to perform a rolling upgrade of the + MapReduce framework under certain conditions. + +* Preconditions and Limitations + + The support for deploying the MapReduce framework via the distributed cache + currently does not address the job client code used to submit and query + jobs. It also does not address the <<>> code that runs as an + auxilliary service within each NodeManager. As a result the following + limitations apply to MapReduce versions that can be successfully deployed via + the distributed cache in a rolling upgrade fashion: + + * The MapReduce version must be compatible with the job client code used to + submit and query jobs. If it is incompatible then the job client must be + upgraded separately on any node from which jobs using the new MapReduce + version will be submitted or queried. + + * The MapReduce version must be compatible with the configuration files used + by the job client submitting the jobs. If it is incompatible with that + configuration (e.g.: a new property must be set or an existing property + value changed) then the configuration must be updated first. + + * The MapReduce version must be compatible with the <<>> + version running on the nodes in the cluster. If it is incompatible then the + new <<>> code must be deployed to all the nodes in the + cluster, and the NodeManagers must be restarted to pick up the new + <<>> code. + +* Deploying a New MapReduce Version via the Distributed Cache + + Deploying a new MapReduce version consists of three steps: + + [[1]] Upload the MapReduce archive to a location that can be accessed by the + job submission client. Ideally the archive should be on the cluster's default + filesystem at a publicly-readable path. See the archive location discussion + below for more details. + + [[2]] Configure <<>> to point to the + location where the archive is located. As when specifying distributed cache + files for a job, this is a URL that also supports creating an alias for the + archive if a URL fragment is specified. For example, + <<>> will + be localized as <<>> rather than + <<>>. + + [[3]] Configure <<>> to set the proper + classpath to use with the MapReduce archive configured above. NOTE: An error + occurs if <<>> is configured but + <<>> does not reference the base name of the + archive path or the alias if an alias was specified. + +** Location of the MapReduce Archive and How It Affects Job Performance + + Note that the location of the MapReduce archive can be critical to job + submission and job startup performance. If the archive is not located on the + cluster's default filesystem then it will be copied to the job staging + directory for each job and localized to each node where the job's tasks + run. This will slow down job submission and task startup performance. + + If the archive is located on the default filesystem then the job client will + not upload the archive to the job staging directory for each job + submission. However if the archive path is not readable by all cluster users + then the archive will be localized separately for each user on each node + where tasks execute. This can cause unnecessary duplication in the + distributed cache. + + When working with a large cluster it can be important to increase the + replication factor of the archive to increase its availability. This will + spread the load when the nodes in the cluster localize the archive for the + first time. + +* MapReduce Archives and Classpath Configuration + + Setting a proper classpath for the MapReduce archive depends upon the + composition of the archive and whether it has any additional dependencies. + For example, the archive can contain not only the MapReduce jars but also the + necessary YARN, HDFS, and Hadoop Common jars and all other dependencies. In + that case, <<>> would be configured to + something like the following example, where the archive basename is + hadoop-mapreduce-2.1.1.tar.gz and the archive is organized internally similar + to the standard Hadoop distribution archive: + + <<<$HADOOP_CONF_DIR,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/lib/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/common/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/common/lib/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/yarn/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/yarn/lib/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/hdfs/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/hdfs/lib/*>>> + + Another possible approach is to have the archive consist of just the + MapReduce jars and have the remaining dependencies picked up from the Hadoop + distribution installed on the nodes. In that case, the above example would + change to something like the following: + + <<<$HADOOP_CONF_DIR,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/lib/*,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*>>> diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 9b083dd5961..f6496b85f5e 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -86,6 +86,7 @@ + From 85a1bd107a441b6a0057256d5b9e3a7e0d8b9f91 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Tue, 1 Oct 2013 22:37:42 +0000 Subject: [PATCH 013/133] YARN-1215. Correct CHANGES.txt. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528239 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3501ed423d2..97764926d7f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -54,8 +54,6 @@ Release 2.3.0 - UNRELEASED YARN-1188. The context of QueueMetrics becomes default when using FairScheduler (Tsuyoshi Ozawa via Sandy Ryza) - YARN-1215. Yarn URL should include userinfo. (Chuan Liu via cnauroth) - Release 2.2.0 - UNRELEASED INCOMPATIBLE CHANGES @@ -114,6 +112,8 @@ Release 2.1.2 - UNRELEASED YARN-1070. Fixed race conditions in NodeManager during container-kill. (Zhijie Shen via vinodkv) + YARN-1215. Yarn URL should include userinfo. (Chuan Liu via cnauroth) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES From 4b9fa6af9d962792eca171f6e67b96fa4cd27ca6 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Tue, 1 Oct 2013 22:42:50 +0000 Subject: [PATCH 014/133] YARN-1262. TestApplicationCleanup relies on all containers assigned in a single heartbeat (Karthik Kambatla via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528243 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/resourcemanager/TestApplicationCleanup.java | 2 ++ 2 files changed, 5 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 97764926d7f..a4b289b7991 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -114,6 +114,9 @@ Release 2.1.2 - UNRELEASED YARN-1215. Yarn URL should include userinfo. (Chuan Liu via cnauroth) + YARN-1262. TestApplicationCleanup relies on all containers assigned in a + single heartbeat (Karthik Kambatla via Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java index c6dfaa64c38..5eecae0987f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java @@ -88,6 +88,7 @@ public class TestApplicationCleanup { conts = am.allocate(new ArrayList(), new ArrayList()).getAllocatedContainers(); contReceived += conts.size(); + nm1.nodeHeartbeat(true); } Assert.assertEquals(request, contReceived); @@ -178,6 +179,7 @@ public class TestApplicationCleanup { new ArrayList()).getAllocatedContainers(); dispatcher.await(); contReceived += conts.size(); + nm1.nodeHeartbeat(true); } Assert.assertEquals(request, contReceived); From 82f4348f27a7b6bb74edb583fc17fef9f4f3b082 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 1 Oct 2013 22:52:25 +0000 Subject: [PATCH 015/133] MAPREDUCE-5536. Fixed MR AM and JHS to respect mapreduce.jobhistory.webapp.https.address. Contributed by Omkar Vinit Joshi. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528251 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../hadoop/mapreduce/v2/app/MRAppMaster.java | 11 +- .../v2/app/client/MRClientService.java | 3 - .../mapreduce/v2/app/rm/RMCommunicator.java | 19 +- .../v2/app/webapp/AppController.java | 3 +- .../mapreduce/v2/app/webapp/JobBlock.java | 3 +- .../mapreduce/v2/app/webapp/NavBlock.java | 14 +- .../mapreduce/v2/app/webapp/TaskPage.java | 8 +- .../mapreduce/v2/app/webapp/WebAppUtil.java | 60 ------ .../v2/app/webapp/dao/AMAttemptInfo.java | 5 +- .../v2/jobhistory/JHAdminConfig.java | 40 +--- .../v2/jobhistory/JobHistoryUtils.java | 42 ---- .../mapreduce/v2/util/MRWebAppUtil.java | 193 ++++++++++++++++++ .../org/apache/hadoop/mapreduce/MRConfig.java | 2 +- .../src/main/resources/mapred-default.xml | 10 +- .../hs/webapp/MapReduceTrackingUriPlugin.java | 7 +- .../hadoop/mapreduce/v2/hs/CompletedJob.java | 5 +- .../mapreduce/v2/hs/HistoryClientService.java | 12 +- .../mapreduce/v2/hs/JobHistoryServer.java | 8 +- .../mapreduce/v2/hs/webapp/HsJobBlock.java | 16 +- .../mapreduce/v2/hs/webapp/HsTaskPage.java | 9 +- .../mapreduce/v2/MiniMRYarnCluster.java | 49 ++++- 22 files changed, 311 insertions(+), 211 deletions(-) delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/WebAppUtil.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 6c755ea1928..303fec6fcb9 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -257,6 +257,9 @@ Release 2.1.2 - UNRELEASED MAPREDUCE-5544. JobClient#getJob loads job conf twice. (Sandy Ryza) + MAPREDUCE-5536. Fixed MR AM and JHS to respect + mapreduce.jobhistory.webapp.https.address. (Omkar Vinit Joshi via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 59bfdd3d20a..9e038121033 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -105,10 +105,11 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator; import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator; import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; @@ -1349,12 +1350,8 @@ public class MRAppMaster extends CompositeService { // to gain access to keystore file for opening SSL listener. We can trust // RM/NM to issue SSL certificates but definitely not MR-AM as it is // running in user-land. - HttpConfig.setSecure(conf.getBoolean(MRConfig.SSL_ENABLED_KEY, - MRConfig.SSL_ENABLED_KEY_DEFAULT)); - WebAppUtil.setSSLEnabledInYARN(conf.getBoolean( - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_DEFAULT)); - + MRWebAppUtil.initialize(conf); + HttpConfig.setSecure(MRWebAppUtil.isSSLEnabledInMRAM()); // log the system properties String systemPropsToLog = MRApps.getSystemPropertiesToLog(conf); if (systemPropsToLog != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java index 1661b8ada1c..181fd3740a9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java @@ -27,10 +27,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.mapreduce.JobACL; -import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol; @@ -80,7 +78,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider; import org.apache.hadoop.mapreduce.v2.app.webapp.AMWebApp; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index cc047619c2c..67c632a87c0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -28,7 +28,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; @@ -39,8 +38,7 @@ import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; -import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; @@ -146,13 +144,9 @@ public abstract class RMCommunicator extends AbstractService if (serviceAddr != null) { request.setHost(serviceAddr.getHostName()); request.setRpcPort(serviceAddr.getPort()); - String scheme = "http://"; - if (getConfig().getBoolean(MRConfig.SSL_ENABLED_KEY, - MRConfig.SSL_ENABLED_KEY_DEFAULT)) { - scheme = "https://"; - } - request.setTrackingUrl(scheme + serviceAddr.getHostName() + ":" - + clientService.getHttpPort()); + request.setTrackingUrl(MRWebAppUtil + .getAMWebappScheme(getConfig()) + + serviceAddr.getHostName() + ":" + clientService.getHttpPort()); } RegisterApplicationMasterResponse response = scheduler.registerApplicationMaster(request); @@ -195,9 +189,8 @@ public abstract class RMCommunicator extends AbstractService LOG.info("Setting job diagnostics to " + sb.toString()); String historyUrl = - WebAppUtil.getSchemePrefix() - + JobHistoryUtils.getHistoryUrl(getConfig(), - context.getApplicationID()); + MRWebAppUtil.getApplicationWebURLOnJHSWithScheme(getConfig(), + context.getApplicationID()); LOG.info("History url is " + historyUrl); FinishApplicationMasterRequest request = FinishApplicationMasterRequest.newInstance(finishState, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java index 89bc8bebfb4..bb188c06f3a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java @@ -36,6 +36,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.AppInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.util.StringHelper; import org.apache.hadoop.yarn.util.Times; @@ -61,7 +62,7 @@ public class AppController extends Controller implements AMParams { this.app = app; set(APP_ID, app.context.getApplicationID().toString()); set(RM_WEB, - JOINER.join(WebAppUtil.getSchemePrefix(), + JOINER.join(MRWebAppUtil.getYARNWebappScheme(), WebAppUtils.getResolvedRMWebAppURLWithoutScheme(conf))); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobBlock.java index ccc5e7ead7a..a8a756d3059 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobBlock.java @@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.v2.app.webapp.dao.AMAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRApps.TaskAttemptStateUI; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; @@ -104,7 +105,7 @@ public class JobBlock extends HtmlBlock { table.tr(). td(String.valueOf(attempt.getAttemptId())). td(new Date(attempt.getStartTime()).toString()). - td().a(".nodelink", url(WebAppUtil.getSchemePrefix(), + td().a(".nodelink", url(MRWebAppUtil.getYARNWebappScheme(), attempt.getNodeHttpAddress()), attempt.getNodeHttpAddress())._(). td().a(".logslink", url(attempt.getLogsLink()), diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java index a2b00e9672d..0edeb168349 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java @@ -18,19 +18,19 @@ package org.apache.hadoop.mapreduce.v2.app.webapp; +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.RM_WEB; + import java.util.List; -import com.google.inject.Inject; - -import static org.apache.hadoop.mapreduce.v2.app.webapp.AMWebApp.*; - -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; -import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.*; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; +import com.google.inject.Inject; + public class NavBlock extends HtmlBlock { final App app; @@ -63,7 +63,7 @@ public class NavBlock extends HtmlBlock { li().a(url("conf", jobid), "Configuration")._(). li().a(url("tasks", jobid, "m"), "Map tasks")._(). li().a(url("tasks", jobid, "r"), "Reduce tasks")._(). - li().a(".logslink", url(WebAppUtil.getSchemePrefix(), + li().a(".logslink", url(MRWebAppUtil.getYARNWebappScheme(), nodeHttpAddress, "node", "containerlogs", thisAmInfo.getContainerId().toString(), app.getJob().getUserName()), diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java index 26ef371f8a8..866c7f1ebb4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java @@ -28,14 +28,14 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit; import java.util.Collection; import org.apache.commons.lang.StringEscapeUtils; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.yarn.webapp.SubView; -import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; +import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import com.google.inject.Inject; @@ -86,12 +86,12 @@ public class TaskPage extends AppView { .append(ta.getState().toString()).append("\",\"") .append(nodeHttpAddr == null ? "N/A" : - "" + "" + nodeHttpAddr + "") .append("\",\"") .append(ta.getAssignedContainerId() == null ? "N/A" : - "logs") .append("\",\"") diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/WebAppUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/WebAppUtil.java deleted file mode 100644 index 18e643a88db..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/WebAppUtil.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.mapreduce.v2.app.webapp; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.http.HttpConfig; -import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; - - -public class WebAppUtil { - private static boolean isSSLEnabledInYARN; - - public static void setSSLEnabledInYARN(boolean isSSLEnabledInYARN) { - WebAppUtil.isSSLEnabledInYARN = isSSLEnabledInYARN; - } - - public static boolean isSSLEnabledInYARN() { - return isSSLEnabledInYARN; - } - - public static String getSchemePrefix() { - if (isSSLEnabledInYARN) { - return "https://"; - } else { - return "http://"; - } - } - - public static void setJHSWebAppURLWithoutScheme(Configuration conf, - String hostAddress) { - if (HttpConfig.isSecure()) { - conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, hostAddress); - } else { - conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, hostAddress); - } - } - - public static String getJHSWebAppURLWithoutScheme(Configuration conf) { - if (HttpConfig.isSecure()) { - return conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS); - } else { - return conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS); - } - } -} \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/AMAttemptInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/AMAttemptInfo.java index a139b5176c2..a172132e2ca 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/AMAttemptInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/AMAttemptInfo.java @@ -24,9 +24,8 @@ import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlRootElement; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; @@ -64,7 +63,7 @@ public class AMAttemptInfo { ContainerId containerId = amInfo.getContainerId(); if (containerId != null) { this.containerId = containerId.toString(); - this.logsLink = join(WebAppUtil.getSchemePrefix() + nodeHttpAddress, + this.logsLink = join(MRWebAppUtil.getYARNWebappScheme() + nodeHttpAddress, ujoin("node", "containerlogs", this.containerId, user)); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java index 411170af6c0..e7986d4afe7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java @@ -25,7 +25,6 @@ import java.net.UnknownHostException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.net.NetUtils; /** @@ -130,6 +129,11 @@ public class JHAdminConfig { public static final String MR_HISTORY_PRINCIPAL = MR_HISTORY_PREFIX + "principal"; + /** To enable SSL in MR history server */ + public static final String MR_HS_SSL_ENABLED = MR_HISTORY_PREFIX + + "ssl.enabled"; + public static boolean DEFAULT_MR_HS_SSL_ENABLED = false; + /**The address the history server webapp is on.*/ public static final String MR_HISTORY_WEBAPP_ADDRESS = MR_HISTORY_PREFIX + "webapp.address"; @@ -188,43 +192,11 @@ public class JHAdminConfig { /** Whether to use fixed ports with the minicluster. */ public static final String MR_HISTORY_MINICLUSTER_FIXED_PORTS = MR_HISTORY_PREFIX + "minicluster.fixed.ports"; - + /** * Default is false to be able to run tests concurrently without port * conflicts. */ public static boolean DEFAULT_MR_HISTORY_MINICLUSTER_FIXED_PORTS = false; - public static String getResolvedMRHistoryWebAppURLWithoutScheme( - Configuration conf) { - InetSocketAddress address = null; - if (HttpConfig.isSecure()) { - address = - conf.getSocketAddr(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_PORT); - } else { - address = - conf.getSocketAddr(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT); } - address = NetUtils.getConnectAddress(address); - StringBuffer sb = new StringBuffer(); - InetAddress resolved = address.getAddress(); - if (resolved == null || resolved.isAnyLocalAddress() || - resolved.isLoopbackAddress()) { - String lh = address.getHostName(); - try { - lh = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - //Ignore and fallback. - } - sb.append(lh); - } else { - sb.append(address.getHostName()); - } - sb.append(":").append(address.getPort()); - return sb.toString(); - } - } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java index 6eaddb87c9f..1ef213936b1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java @@ -20,11 +20,7 @@ package org.apache.hadoop.mapreduce.v2.jobhistory; import java.io.File; import java.io.IOException; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.net.UnknownHostException; import java.util.Calendar; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; @@ -45,13 +41,8 @@ import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.util.MRApps; -import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; - -import com.google.common.base.Joiner; -import com.google.common.base.Splitter; @InterfaceAudience.Private @InterfaceStability.Unstable @@ -126,9 +117,6 @@ public class JobHistoryUtils { public static final Pattern TIMESTAMP_DIR_PATTERN = Pattern.compile(TIMESTAMP_DIR_REGEX); private static final String TIMESTAMP_DIR_FORMAT = "%04d" + File.separator + "%02d" + File.separator + "%02d"; - private static final Splitter ADDR_SPLITTER = Splitter.on(':').trimResults(); - private static final Joiner JOINER = Joiner.on(""); - private static final PathFilter CONF_FILTER = new PathFilter() { @Override public boolean accept(Path path) { @@ -497,36 +485,6 @@ public class JobHistoryUtils { return result; } - public static String getHistoryUrl(Configuration conf, ApplicationId appId) - throws UnknownHostException { - //construct the history url for job - String addr = conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS); - Iterator it = ADDR_SPLITTER.split(addr).iterator(); - it.next(); // ignore the bind host - String port = it.next(); - // Use hs address to figure out the host for webapp - addr = conf.get(JHAdminConfig.MR_HISTORY_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_ADDRESS); - String host = ADDR_SPLITTER.split(addr).iterator().next(); - String hsAddress = JOINER.join(host, ":", port); - InetSocketAddress address = NetUtils.createSocketAddr( - hsAddress, JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS); - StringBuffer sb = new StringBuffer(); - if (address.getAddress().isAnyLocalAddress() || - address.getAddress().isLoopbackAddress()) { - sb.append(InetAddress.getLocalHost().getCanonicalHostName()); - } else { - sb.append(address.getHostName()); - } - sb.append(":").append(address.getPort()); - sb.append("/jobhistory/job/"); - JobID jobId = TypeConverter.fromYarn(appId); - sb.append(jobId.toString()); - return sb.toString(); - } - public static Path getPreviousJobHistoryPath( Configuration conf, ApplicationAttemptId applicationAttemptId) throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java new file mode 100644 index 00000000000..095d25b7841 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.v2.util; + +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.util.Iterator; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.MRConfig; +import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.yarn.api.records.ApplicationId; + +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; + +@Private +@Evolving +public class MRWebAppUtil { + private static final Splitter ADDR_SPLITTER = Splitter.on(':').trimResults(); + private static final Joiner JOINER = Joiner.on(""); + + private static boolean isSSLEnabledInYARN; + private static boolean isSSLEnabledInJHS; + private static boolean isSSLEnabledInMRAM; + + public static void initialize(Configuration conf) { + setSSLEnabledInYARN(conf.getBoolean( + CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, + CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_DEFAULT)); + setSSLEnabledInJHS(conf.getBoolean(JHAdminConfig.MR_HS_SSL_ENABLED, + JHAdminConfig.DEFAULT_MR_HS_SSL_ENABLED)); + setSSLEnabledInMRAM(conf.getBoolean(MRConfig.SSL_ENABLED_KEY, + MRConfig.SSL_ENABLED_KEY_DEFAULT)); + } + + private static void setSSLEnabledInYARN(boolean isSSLEnabledInYARN) { + MRWebAppUtil.isSSLEnabledInYARN = isSSLEnabledInYARN; + } + + private static void setSSLEnabledInJHS(boolean isSSLEnabledInJHS) { + MRWebAppUtil.isSSLEnabledInJHS = isSSLEnabledInJHS; + } + + private static void setSSLEnabledInMRAM(boolean isSSLEnabledInMRAM) { + MRWebAppUtil.isSSLEnabledInMRAM = isSSLEnabledInMRAM; + } + + public static boolean isSSLEnabledInYARN() { + return isSSLEnabledInYARN; + } + + public static boolean isSSLEnabledInJHS() { + return isSSLEnabledInJHS; + } + + public static boolean isSSLEnabledInMRAM() { + return isSSLEnabledInMRAM; + } + + public static String getYARNWebappScheme() { + if (isSSLEnabledInYARN) { + return "https://"; + } else { + return "http://"; + } + } + + public static String getJHSWebappScheme() { + if (isSSLEnabledInJHS) { + return "https://"; + } else { + return "http://"; + } + } + + public static void setJHSWebappURLWithoutScheme(Configuration conf, + String hostAddress) { + if (isSSLEnabledInJHS) { + conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, hostAddress); + } else { + conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, hostAddress); + } + } + + public static String getJHSWebappURLWithoutScheme(Configuration conf) { + if (isSSLEnabledInJHS) { + return conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS); + } else { + return conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS); + } + } + + public static String getJHSWebappURLWithScheme(Configuration conf) { + return getJHSWebappScheme() + getJHSWebappURLWithoutScheme(conf); + } + + public static InetSocketAddress getJHSWebBindAddress(Configuration conf) { + if (isSSLEnabledInJHS) { + return conf.getSocketAddr(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_PORT); + } else { + return conf.getSocketAddr(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT); + } + } + + public static String getApplicationWebURLOnJHSWithoutScheme(Configuration conf, + ApplicationId appId) + throws UnknownHostException { + //construct the history url for job + String addr = getJHSWebappURLWithoutScheme(conf); + Iterator it = ADDR_SPLITTER.split(addr).iterator(); + it.next(); // ignore the bind host + String port = it.next(); + // Use hs address to figure out the host for webapp + addr = conf.get(JHAdminConfig.MR_HISTORY_ADDRESS, + JHAdminConfig.DEFAULT_MR_HISTORY_ADDRESS); + String host = ADDR_SPLITTER.split(addr).iterator().next(); + String hsAddress = JOINER.join(host, ":", port); + InetSocketAddress address = NetUtils.createSocketAddr( + hsAddress, getDefaultJHSWebappPort(), + getDefaultJHSWebappURLWithoutScheme()); + StringBuffer sb = new StringBuffer(); + if (address.getAddress().isAnyLocalAddress() || + address.getAddress().isLoopbackAddress()) { + sb.append(InetAddress.getLocalHost().getCanonicalHostName()); + } else { + sb.append(address.getHostName()); + } + sb.append(":").append(address.getPort()); + sb.append("/jobhistory/job/"); + JobID jobId = TypeConverter.fromYarn(appId); + sb.append(jobId.toString()); + return sb.toString(); + } + + public static String getApplicationWebURLOnJHSWithScheme(Configuration conf, + ApplicationId appId) throws UnknownHostException { + return getJHSWebappScheme() + + getApplicationWebURLOnJHSWithoutScheme(conf, appId); + } + + private static int getDefaultJHSWebappPort() { + if (isSSLEnabledInJHS) { + return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_PORT; + } else { + return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT; + } + } + + private static String getDefaultJHSWebappURLWithoutScheme() { + if (isSSLEnabledInJHS) { + return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS; + } else { + return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS; + } + } + + public static String getAMWebappScheme(Configuration conf) { + if (isSSLEnabledInMRAM) { + return "https://"; + } else { + return "http://"; + } + } +} \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java index 879f70d0983..830bb4406cd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java @@ -86,7 +86,7 @@ public interface MRConfig { public static final boolean SHUFFLE_SSL_ENABLED_DEFAULT = false; public static final String SSL_ENABLED_KEY = - "mapreduce.ssl.enabled"; + "mapreduce.am.ssl.enabled"; public static final boolean SSL_ENABLED_KEY_DEFAULT = false; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index c08836d45ee..657805daea0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -290,7 +290,7 @@ - mapreduce.ssl.enabled + mapreduce.am.ssl.enabled false If enabled, MapReduce application master's http server will be @@ -1225,4 +1225,12 @@ storage class. + + mapreduce.jobhistory.ssl.enabled + false + + Whether to use SSL for the HTTP endpoints. If set to true, the + JobHistoryServer web UIs will be served over HTTPS instead HTTP. + + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java index 6d148a7825c..de0f83206a7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/MapReduceTrackingUriPlugin.java @@ -24,7 +24,7 @@ import java.net.URISyntaxException; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.util.TrackingUriPlugin; @@ -54,8 +54,7 @@ public class MapReduceTrackingUriPlugin extends TrackingUriPlugin implements public URI getTrackingUri(ApplicationId id) throws URISyntaxException { String jobSuffix = id.toString().replaceFirst("^application_", "job_"); String historyServerAddress = - this.getConf().get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS); - return new URI("http://" + historyServerAddress + "/jobhistory/job/" - + jobSuffix); + MRWebAppUtil.getJHSWebappURLWithScheme(getConf()); + return new URI(historyServerAddress + "/jobhistory/job/"+ jobSuffix); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java index 01dcbe4dc83..3f0644ab376 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java @@ -58,6 +58,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo; import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -142,7 +143,9 @@ public class CompletedJob implements org.apache.hadoop.mapreduce.v2.app.job.Job report.setJobFile(getConfFile().toString()); String historyUrl = "N/A"; try { - historyUrl = JobHistoryUtils.getHistoryUrl(conf, jobId.getAppId()); + historyUrl = + MRWebAppUtil.getApplicationWebURLOnJHSWithoutScheme(conf, + jobId.getAppId()); } catch (UnknownHostException e) { //Ignore. } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java index 87fb1ed41ee..f27124cdf23 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java @@ -75,12 +75,12 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.security.authorize.ClientHSPolicyProvider; import org.apache.hadoop.mapreduce.v2.hs.webapp.HsWebApp; import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.service.AbstractService; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -144,10 +144,7 @@ public class HistoryClientService extends AbstractService { private void initializeWebApp(Configuration conf) { webApp = new HsWebApp(history); - InetSocketAddress bindAddress = conf.getSocketAddr( - JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS, - JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT); + InetSocketAddress bindAddress = MRWebAppUtil.getJHSWebBindAddress(conf); // NOTE: there should be a .at(InetSocketAddress) WebApps .$for("jobhistory", HistoryClientService.class, this, "ws") @@ -157,8 +154,9 @@ public class HistoryClientService extends AbstractService { .withHttpSpnegoPrincipalKey( JHAdminConfig.MR_WEBAPP_SPNEGO_USER_NAME_KEY) .at(NetUtils.getHostPortString(bindAddress)).start(webApp); - conf.updateConnectAddr(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, - webApp.getListenerAddress()); + + MRWebAppUtil.setJHSWebappURLWithoutScheme(conf, + NetUtils.getHostPortString(webApp.getListenerAddress())); } @Override diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java index 19c3f054a77..168d75d1083 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java @@ -25,12 +25,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MRConfig; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService.HistoryServerState; import org.apache.hadoop.mapreduce.v2.hs.server.HSAdminServer; import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.security.SecurityUtil; @@ -118,9 +119,8 @@ public class JobHistoryServer extends CompositeService { config.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true); // This is required for WebApps to use https if enabled. - WebAppUtil.setSSLEnabledInYARN(conf.getBoolean( - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_DEFAULT)); + MRWebAppUtil.initialize(getConfig()); + HttpConfig.setSecure(MRWebAppUtil.isSSLEnabledInJHS()); try { doSecureLogin(conf); } catch(IOException ie) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java index 5bc44d19fe2..bb1cb3b9484 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java @@ -18,21 +18,25 @@ package org.apache.hadoop.mapreduce.v2.hs.webapp; -import com.google.inject.Inject; +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.JOB_ID; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI._EVEN; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH; + import java.util.Date; import java.util.List; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.job.Job; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.ConfEntryInfo; import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.AMAttemptInfo; import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRApps.TaskAttemptStateUI; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.ResponseInfo; @@ -41,8 +45,8 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import org.apache.hadoop.yarn.webapp.view.InfoBlock; -import static org.apache.hadoop.mapreduce.v2.app.webapp.AMWebApp.*; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; + +import com.google.inject.Inject; /** * Render a block of HTML for a give job. @@ -133,7 +137,7 @@ public class HsJobBlock extends HtmlBlock { table.tr((odd = !odd) ? _ODD : _EVEN). td(String.valueOf(attempt.getAttemptId())). td(new Date(attempt.getStartTime()).toString()). - td().a(".nodelink", url(WebAppUtil.getSchemePrefix(), + td().a(".nodelink", url(MRWebAppUtil.getYARNWebappScheme(), attempt.getNodeHttpAddress()), attempt.getNodeHttpAddress())._(). td().a(".logslink", url(attempt.getShortLogsLink()), diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java index ba8b68fec2d..4d8a8cfc7b1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java @@ -18,8 +18,8 @@ package org.apache.hadoop.mapreduce.v2.hs.webapp; -import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_TYPE; import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_ID; +import static org.apache.hadoop.mapreduce.v2.app.webapp.AMParams.TASK_TYPE; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.ACCORDION; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID; @@ -30,20 +30,17 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit; import java.util.Collection; import org.apache.commons.lang.StringEscapeUtils; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.webapp.App; -import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil; import org.apache.hadoop.mapreduce.v2.util.MRApps; -import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; -import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TD; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TFOOT; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.THEAD; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TR; @@ -149,7 +146,7 @@ public class HsTaskPage extends HsView { .append(sortId + " ").append(taid).append("\",\"") .append(ta.getState().toString()).append("\",\"") - .append("") + .append("") .append(nodeRackName + "/" + nodeHttpAddr + "\",\"") .append(" Date: Wed, 2 Oct 2013 00:03:15 +0000 Subject: [PATCH 017/133] HDFS-5255. Distcp job fails with hsftp when https is enabled in insecure cluster. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528279 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../apache/hadoop/hdfs/HftpFileSystem.java | 60 ++++++++++--------- .../apache/hadoop/hdfs/HsftpFileSystem.java | 23 ++++--- .../server/namenode/FileChecksumServlets.java | 11 +++- .../hdfs/server/namenode/FileDataServlet.java | 11 +++- .../hadoop/hdfs/TestHftpDelegationToken.java | 10 ++-- .../hadoop/hdfs/TestHftpFileSystem.java | 24 ++++---- 7 files changed, 79 insertions(+), 63 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f66f58620ac..647d59ef1a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -382,6 +382,9 @@ Release 2.1.2 - UNRELEASED HDFS-5265. Namenode fails to start when dfs.https.port is unspecified. (Haohui Mai via jing9) + HDFS-5255. Distcp job fails with hsftp when https is enabled in insecure + cluster. (Arpit Agarwal) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java index dd5e9c6daa0..361f6a0c462 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java @@ -94,7 +94,6 @@ public class HftpFileSystem extends FileSystem private URI hftpURI; protected URI nnUri; - protected URI nnSecureUri; public static final String HFTP_TIMEZONE = "UTC"; public static final String HFTP_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ssZ"; @@ -134,34 +133,33 @@ public class HftpFileSystem extends FileSystem DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT); } - protected int getDefaultSecurePort() { - return getConf().getInt(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, - DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT); - } - + /** + * We generate the address with one of the following ports, in + * order of preference. + * 1. Port from the hftp URI e.g. hftp://namenode:4000/ will return 4000. + * 2. Port configured via DFS_NAMENODE_HTTP_PORT_KEY + * 3. DFS_NAMENODE_HTTP_PORT_DEFAULT i.e. 50070. + * + * @param uri + * @return + */ protected InetSocketAddress getNamenodeAddr(URI uri) { // use authority so user supplied uri can override port return NetUtils.createSocketAddr(uri.getAuthority(), getDefaultPort()); } - protected InetSocketAddress getNamenodeSecureAddr(URI uri) { - // must only use the host and the configured https port - return NetUtils.createSocketAddrForHost(uri.getHost(), getDefaultSecurePort()); - } - protected URI getNamenodeUri(URI uri) { - return DFSUtil.createUri("http", getNamenodeAddr(uri)); - } - - protected URI getNamenodeSecureUri(URI uri) { - return DFSUtil.createUri("http", getNamenodeSecureAddr(uri)); + return DFSUtil.createUri(getUnderlyingProtocol(), getNamenodeAddr(uri)); } + /** + * See the documentation of {@Link #getNamenodeAddr(URI)} for the logic + * behind selecting the canonical service name. + * @return + */ @Override public String getCanonicalServiceName() { - // unlike other filesystems, hftp's service is the secure port, not the - // actual port in the uri - return SecurityUtil.buildTokenService(nnSecureUri).toString(); + return SecurityUtil.buildTokenService(nnUri).toString(); } @Override @@ -187,7 +185,6 @@ public class HftpFileSystem extends FileSystem setConf(conf); this.ugi = UserGroupInformation.getCurrentUser(); this.nnUri = getNamenodeUri(name); - this.nnSecureUri = getNamenodeSecureUri(name); try { this.hftpURI = new URI(name.getScheme(), name.getAuthority(), null, null, null); @@ -225,7 +222,7 @@ public class HftpFileSystem extends FileSystem protected Token selectDelegationToken( UserGroupInformation ugi) { - return hftpTokenSelector.selectToken(nnSecureUri, ugi.getTokens(), getConf()); + return hftpTokenSelector.selectToken(nnUri, ugi.getTokens(), getConf()); } @@ -234,6 +231,13 @@ public class HftpFileSystem extends FileSystem return renewToken; } + /** + * Return the underlying protocol that is used to talk to the namenode. + */ + protected String getUnderlyingProtocol() { + return "http"; + } + @Override public synchronized void setDelegationToken(Token token) { renewToken = token; @@ -257,7 +261,7 @@ public class HftpFileSystem extends FileSystem return ugi.doAs(new PrivilegedExceptionAction>() { @Override public Token run() throws IOException { - final String nnHttpUrl = nnSecureUri.toString(); + final String nnHttpUrl = nnUri.toString(); Credentials c; try { c = DelegationTokenFetcher.getDTfromRemote(nnHttpUrl, renewer); @@ -301,7 +305,7 @@ public class HftpFileSystem extends FileSystem * @throws IOException on error constructing the URL */ protected URL getNamenodeURL(String path, String query) throws IOException { - final URL url = new URL("http", nnUri.getHost(), + final URL url = new URL(getUnderlyingProtocol(), nnUri.getHost(), nnUri.getPort(), path + '?' + query); if (LOG.isTraceEnabled()) { LOG.trace("url=" + url); @@ -703,17 +707,20 @@ public class HftpFileSystem extends FileSystem return true; } + protected String getUnderlyingProtocol() { + return "http"; + } + @SuppressWarnings("unchecked") @Override public long renew(Token token, Configuration conf) throws IOException { // update the kerberos credentials, if they are coming from a keytab UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); - // use http to renew the token InetSocketAddress serviceAddr = SecurityUtil.getTokenServiceAddr(token); return DelegationTokenFetcher.renewDelegationToken - (DFSUtil.createUri("http", serviceAddr).toString(), + (DFSUtil.createUri(getUnderlyingProtocol(), serviceAddr).toString(), (Token) token); } @@ -723,10 +730,9 @@ public class HftpFileSystem extends FileSystem Configuration conf) throws IOException { // update the kerberos credentials, if they are coming from a keytab UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); - // use http to cancel the token InetSocketAddress serviceAddr = SecurityUtil.getTokenServiceAddr(token); DelegationTokenFetcher.cancelDelegationToken - (DFSUtil.createUri("http", serviceAddr).toString(), + (DFSUtil.createUri(getUnderlyingProtocol(), serviceAddr).toString(), (Token) token); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HsftpFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HsftpFileSystem.java index 6a3bdba593b..5f5c4836953 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HsftpFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HsftpFileSystem.java @@ -68,6 +68,14 @@ public class HsftpFileSystem extends HftpFileSystem { return "hsftp"; } + /** + * Return the underlying protocol that is used to talk to the namenode. + */ + @Override + protected String getUnderlyingProtocol() { + return "https"; + } + @Override public void initialize(URI name, Configuration conf) throws IOException { super.initialize(name, conf); @@ -134,24 +142,15 @@ public class HsftpFileSystem extends HftpFileSystem { @Override protected int getDefaultPort() { - return getDefaultSecurePort(); + return getConf().getInt(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, + DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT); } - @Override - protected InetSocketAddress getNamenodeSecureAddr(URI uri) { - return getNamenodeAddr(uri); - } - - @Override - protected URI getNamenodeUri(URI uri) { - return getNamenodeSecureUri(uri); - } - @Override protected HttpURLConnection openConnection(String path, String query) throws IOException { query = addDelegationTokenParam(query); - final URL url = new URL("https", nnUri.getHost(), + final URL url = new URL(getUnderlyingProtocol(), nnUri.getHost(), nnUri.getPort(), path + '?' + query); HttpsURLConnection conn; conn = (HttpsURLConnection)connectionFactory.openConnection(url); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileChecksumServlets.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileChecksumServlets.java index 5c9d164e2ff..4e0ec8f4550 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileChecksumServlets.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileChecksumServlets.java @@ -57,9 +57,14 @@ public class FileChecksumServlets { final String hostname = host instanceof DatanodeInfo ? ((DatanodeInfo)host).getHostName() : host.getIpAddr(); final String scheme = request.getScheme(); - final int port = "https".equals(scheme) - ? (Integer)getServletContext().getAttribute(DFSConfigKeys.DFS_DATANODE_HTTPS_PORT_KEY) - : host.getInfoPort(); + int port = host.getInfoPort(); + if ("https".equals(scheme)) { + final Integer portObject = (Integer) getServletContext().getAttribute( + DFSConfigKeys.DFS_DATANODE_HTTPS_PORT_KEY); + if (portObject != null) { + port = portObject; + } + } final String encodedPath = ServletUtil.getRawPath(request, "/fileChecksum"); String dtParam = ""; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileDataServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileDataServlet.java index 8beef465fac..fa6391f7fc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileDataServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileDataServlet.java @@ -61,9 +61,14 @@ public class FileDataServlet extends DfsServlet { } else { hostname = host.getIpAddr(); } - final int port = "https".equals(scheme) - ? (Integer)getServletContext().getAttribute(DFSConfigKeys.DFS_DATANODE_HTTPS_PORT_KEY) - : host.getInfoPort(); + int port = host.getInfoPort(); + if ("https".equals(scheme)) { + final Integer portObject = (Integer) getServletContext().getAttribute( + DFSConfigKeys.DFS_DATANODE_HTTPS_PORT_KEY); + if (portObject != null) { + port = portObject; + } + } String dtParam = ""; if (dt != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpDelegationToken.java index 6dd7545c614..5e82baa539c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpDelegationToken.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpDelegationToken.java @@ -88,19 +88,21 @@ public class TestHftpDelegationToken { URI fsUri = URI.create("hftp://localhost"); MyHftpFileSystem fs = (MyHftpFileSystem) FileSystem.newInstance(fsUri, conf); assertEquals(httpPort, fs.getCanonicalUri().getPort()); - checkTokenSelection(fs, httpsPort, conf); // should still use secure port + checkTokenSelection(fs, httpPort, conf); // test with explicit default port + // Make sure it uses the port from the hftp URI. fsUri = URI.create("hftp://localhost:"+httpPort); fs = (MyHftpFileSystem) FileSystem.newInstance(fsUri, conf); assertEquals(httpPort, fs.getCanonicalUri().getPort()); - checkTokenSelection(fs, httpsPort, conf); // should still use secure port + checkTokenSelection(fs, httpPort, conf); // test with non-default port + // Make sure it uses the port from the hftp URI. fsUri = URI.create("hftp://localhost:"+(httpPort+1)); fs = (MyHftpFileSystem) FileSystem.newInstance(fsUri, conf); assertEquals(httpPort+1, fs.getCanonicalUri().getPort()); - checkTokenSelection(fs, httpsPort, conf); // should still use secure port + checkTokenSelection(fs, httpPort + 1, conf); conf.setInt(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, 5); } @@ -178,7 +180,7 @@ public class TestHftpDelegationToken { } assertNotNull(ex); assertNotNull(ex.getCause()); - assertEquals("Unexpected end of file from server", + assertEquals("Remote host closed connection during handshake", ex.getCause().getMessage()); } finally { t.interrupt(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpFileSystem.java index 5213db8431d..93ee402aa9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpFileSystem.java @@ -294,11 +294,13 @@ public class TestHftpFileSystem { HftpFileSystem fs = (HftpFileSystem) FileSystem.get(uri, conf); assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, fs.getDefaultPort()); - assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); + + // HFTP uses http to get the token so canonical service name should + // return the http port. assertEquals( - "127.0.0.1:"+DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, + "127.0.0.1:" + DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, fs.getCanonicalServiceName() ); } @@ -307,17 +309,18 @@ public class TestHftpFileSystem { public void testHftpCustomDefaultPorts() throws IOException { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_KEY, 123); - conf.setInt(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, 456); URI uri = URI.create("hftp://localhost"); HftpFileSystem fs = (HftpFileSystem) FileSystem.get(uri, conf); assertEquals(123, fs.getDefaultPort()); - assertEquals(456, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); + + // HFTP uses http to get the token so canonical service name should + // return the http port. assertEquals( - "127.0.0.1:456", + "127.0.0.1:123", fs.getCanonicalServiceName() ); } @@ -329,11 +332,10 @@ public class TestHftpFileSystem { HftpFileSystem fs = (HftpFileSystem) FileSystem.get(uri, conf); assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, fs.getDefaultPort()); - assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); assertEquals( - "127.0.0.1:"+DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, + "127.0.0.1:123", fs.getCanonicalServiceName() ); } @@ -342,17 +344,15 @@ public class TestHftpFileSystem { public void testHftpCustomUriPortWithCustomDefaultPorts() throws IOException { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_KEY, 123); - conf.setInt(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, 456); URI uri = URI.create("hftp://localhost:789"); HftpFileSystem fs = (HftpFileSystem) FileSystem.get(uri, conf); assertEquals(123, fs.getDefaultPort()); - assertEquals(456, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); assertEquals( - "127.0.0.1:456", + "127.0.0.1:789", fs.getCanonicalServiceName() ); } @@ -366,7 +366,6 @@ public class TestHftpFileSystem { HsftpFileSystem fs = (HsftpFileSystem) FileSystem.get(uri, conf); assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, fs.getDefaultPort()); - assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); assertEquals( @@ -385,7 +384,6 @@ public class TestHftpFileSystem { HsftpFileSystem fs = (HsftpFileSystem) FileSystem.get(uri, conf); assertEquals(456, fs.getDefaultPort()); - assertEquals(456, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); assertEquals( @@ -401,7 +399,6 @@ public class TestHftpFileSystem { HsftpFileSystem fs = (HsftpFileSystem) FileSystem.get(uri, conf); assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, fs.getDefaultPort()); - assertEquals(DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); assertEquals( @@ -420,7 +417,6 @@ public class TestHftpFileSystem { HsftpFileSystem fs = (HsftpFileSystem) FileSystem.get(uri, conf); assertEquals(456, fs.getDefaultPort()); - assertEquals(456, fs.getDefaultSecurePort()); assertEquals(uri, fs.getUri()); assertEquals( From dd0fb6d75c06d7d63e1cf4a1abbc136d73fe2b76 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 2 Oct 2013 00:37:59 +0000 Subject: [PATCH 018/133] move HADOOP-9758 to the branch-2.1.2 section git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528288 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7a440bd272b..e47d175063e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -315,9 +315,6 @@ Release 2.3.0 - UNRELEASED HADOOP-9435. Support building the JNI code against the IBM JVM. (Tian Hong Wang via Colin Patrick McCabe) - HADOOP-9758. Provide configuration option for FileSystem/FileContext - symlink resolution. (Andrew Wang via Colin Patrick McCabe) - HADOOP-9848. Create a MiniKDC for use with security testing. (ywskycn via tucu) @@ -410,6 +407,9 @@ Release 2.1.2 - UNRELEASED HADOOP-9976. Different versions of avro and avro-maven-plugin (Karthik Kambatla via Sandy Ryza) + HADOOP-9758. Provide configuration option for FileSystem/FileContext + symlink resolution. (Andrew Wang via Colin Patrick McCabe) + OPTIMIZATIONS BUG FIXES From c7cd620fa4dfac1322a41c50d3d0965dabc6be44 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Wed, 2 Oct 2013 00:49:00 +0000 Subject: [PATCH 019/133] HADOOP-8315. Support SASL-authenticated ZooKeeper in ActiveStandbyElector. Contributed by Todd Lipcon git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528293 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../main/java/org/apache/hadoop/ha/ActiveStandbyElector.java | 3 +++ hadoop-project/pom.xml | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index e47d175063e..836a86dc54b 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -393,6 +393,9 @@ Release 2.2.0 - UNRELEASED BUG FIXES + HADOOP-8315. Support SASL-authenticated ZooKeeper in ActiveStandbyElector + (todd) + Release 2.1.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 4682bc70261..a5c1467f4fb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -568,6 +568,9 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { enterNeutralMode(); reJoinElection(0); break; + case SaslAuthenticated: + LOG.info("Successfully authenticated to ZooKeeper using SASL."); + break; default: fatalError("Unexpected Zookeeper watch event state: " + event.getState()); diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index d5b3d8027cd..c8e62af2a76 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -685,7 +685,7 @@ org.apache.zookeeper zookeeper - 3.4.2 + 3.4.5 @@ -709,7 +709,7 @@ org.apache.zookeeper zookeeper - 3.4.2 + 3.4.5 test-jar test From 703838c59841952757d941df4414aa77d57fe492 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Wed, 2 Oct 2013 04:00:06 +0000 Subject: [PATCH 020/133] HADOOP-10012. Secure Oozie jobs fail with delegation token renewal exception in Namenode HA setup. Contributed by Daryn Sharp and Suresh Srinivas. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528301 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/security/UserGroupInformation.java | 10 ++- .../apache/hadoop/security/token/Token.java | 36 +++++------ .../security/TestUserGroupInformation.java | 61 +++++++++++++------ .../java/org/apache/hadoop/hdfs/HAUtil.java | 35 ++++++----- 4 files changed, 92 insertions(+), 50 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index 1594ffe0ea8..3bb807341a3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -33,6 +33,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -1325,7 +1326,14 @@ public class UserGroupInformation { * @return Credentials of tokens associated with this user */ public synchronized Credentials getCredentials() { - return new Credentials(getCredentialsInternal()); + Credentials creds = new Credentials(getCredentialsInternal()); + Iterator> iter = creds.getAllTokens().iterator(); + while (iter.hasNext()) { + if (iter.next() instanceof Token.PrivateToken) { + iter.remove(); + } + } + return creds; } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java index 905c948da75..14d81910b51 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java @@ -19,31 +19,20 @@ package org.apache.hadoop.security.token; import com.google.common.collect.Maps; - -import java.io.ByteArrayInputStream; -import java.io.DataInput; -import java.io.DataInputStream; -import java.io.DataOutput; -import java.io.IOException; -import java.util.Arrays; -import java.util.Map; -import java.util.ServiceLoader; - import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.DataInputBuffer; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparator; -import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.io.*; import org.apache.hadoop.util.ReflectionUtils; +import java.io.*; +import java.util.Arrays; +import java.util.Map; +import java.util.ServiceLoader; + /** * The client-side form of the token. */ @@ -195,6 +184,19 @@ public class Token implements Writable { service = newService; } + /** + * Indicates whether the token is a clone. Used by HA failover proxy + * to indicate a token should not be visible to the user via + * UGI.getCredentials() + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public static class PrivateToken extends Token { + public PrivateToken(Token token) { + super(token); + } + } + @Override public void readFields(DataInput in) throws IOException { int len = WritableUtils.readVInt(in); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java index aa40cf48bd9..baa95b14fc1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java @@ -16,11 +16,21 @@ */ package org.apache.hadoop.security; -import static org.junit.Assert.*; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.ipc.TestSaslRPC; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.apache.hadoop.security.authentication.util.KerberosName; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.Shell; import org.junit.*; -import static org.mockito.Mockito.*; - +import javax.security.auth.Subject; +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.LoginContext; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -30,21 +40,13 @@ import java.util.Collection; import java.util.LinkedHashSet; import java.util.Set; -import javax.security.auth.Subject; -import javax.security.auth.login.AppConfigurationEntry; -import javax.security.auth.login.LoginContext; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.metrics2.MetricsRecordBuilder; -import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; -import org.apache.hadoop.security.authentication.util.KerberosName; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import static org.apache.hadoop.test.MetricsAsserts.*; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL; -import org.apache.hadoop.util.Shell; +import static org.apache.hadoop.ipc.TestSaslRPC.*; +import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier; +import static org.apache.hadoop.test.MetricsAsserts.*; +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class TestUserGroupInformation { final private static String USER_NAME = "user1@HADOOP.APACHE.ORG"; @@ -786,4 +788,29 @@ public class TestUserGroupInformation { UserGroupInformation.setLoginUser(ugi); assertEquals(ugi, UserGroupInformation.getLoginUser()); } + + /** + * In some scenario, such as HA, delegation tokens are associated with a + * logical name. The tokens are cloned and are associated with the + * physical address of the server where the service is provided. + * This test ensures cloned delegated tokens are locally used + * and are not returned in {@link UserGroupInformation#getCredentials()} + */ + @Test + public void testPrivateTokenExclusion() throws Exception { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + TestTokenIdentifier tokenId = new TestTokenIdentifier(); + Token token = new Token( + tokenId.getBytes(), "password".getBytes(), + tokenId.getKind(), null); + ugi.addToken(new Text("regular-token"), token); + + // Now add cloned private token + ugi.addToken(new Text("private-token"), new Token.PrivateToken(token)); + ugi.addToken(new Text("private-token1"), new Token.PrivateToken(token)); + + // Ensure only non-private tokens are returned + Collection> tokens = ugi.getCredentials().getAllTokens(); + assertEquals(1, tokens.size()); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java index 9674b6d6f7b..7d53fb991d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java @@ -17,15 +17,9 @@ */ package org.apache.hadoop.hdfs; -import static org.apache.hadoop.hdfs.DFSConfigKeys.*; -import java.io.IOException; -import java.net.InetSocketAddress; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Map; - +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -41,11 +35,17 @@ import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; -import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HA_DT_SERVICE_PREFIX; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.*; +import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HA_DT_SERVICE_PREFIX; public class HAUtil { @@ -265,10 +265,15 @@ public class HAUtil { tokenSelector.selectToken(haService, ugi.getTokens()); if (haToken != null) { for (InetSocketAddress singleNNAddr : nnAddrs) { + // this is a minor hack to prevent physical HA tokens from being + // exposed to the user via UGI.getCredentials(), otherwise these + // cloned tokens may be inadvertently propagated to jobs Token specificToken = - new Token(haToken); + new Token.PrivateToken(haToken); SecurityUtil.setTokenService(specificToken, singleNNAddr); - ugi.addToken(specificToken); + Text alias = + new Text(HA_DT_SERVICE_PREFIX + "//" + specificToken.getService()); + ugi.addToken(alias, specificToken); LOG.debug("Mapped HA service delegation token for logical URI " + haUri + " to namenode " + singleNNAddr); } From fc5bd930df793c3fab2964bb9ea2d0e9d412e493 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Wed, 2 Oct 2013 05:25:18 +0000 Subject: [PATCH 021/133] HDFS-5279. Guard against NullPointerException in NameNode JSP pages before initialization of FSNamesystem. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528308 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/namenode/NamenodeJspHelper.java | 49 ++++++++++--- .../src/main/webapps/hdfs/corrupt_files.jsp | 10 ++- .../src/main/webapps/hdfs/dfshealth.jsp | 13 +--- .../src/main/webapps/hdfs/dfsnodelist.jsp | 4 +- .../namenode/TestNameNodeJspHelper.java | 73 +++++++++++++++++++ 6 files changed, 125 insertions(+), 27 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 647d59ef1a8..29b9be665d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -385,6 +385,9 @@ Release 2.1.2 - UNRELEASED HDFS-5255. Distcp job fails with hsftp when https is enabled in insecure cluster. (Arpit Agarwal) + HDFS-5279. Guard against NullPointerException in NameNode JSP pages before + initialization of FSNamesystem. (cnauroth) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java index 5f8d9d12fbe..1f3d328b8f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java @@ -30,6 +30,7 @@ import java.net.URLEncoder; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Date; import java.util.Iterator; import java.util.List; @@ -210,6 +211,9 @@ class NamenodeJspHelper { static void generateSnapshotReport(JspWriter out, FSNamesystem fsn) throws IOException { + if (fsn == null) { + return; + } out.println("
" + "\n" + "" @@ -652,7 +656,8 @@ class NamenodeJspHelper { .getAttribute(JspHelper.CURRENT_CONF); // We can't redirect if there isn't a DN to redirect to. // Lets instead show a proper error message. - if (nn.getNamesystem().getNumLiveDataNodes() < 1) { + FSNamesystem fsn = nn.getNamesystem(); + if (fsn == null || fsn.getNumLiveDataNodes() < 1) { throw new IOException("Can't browse the DFS since there are no " + "live nodes available to redirect to."); } @@ -688,6 +693,20 @@ class NamenodeJspHelper { resp.sendRedirect(redirectLocation); } + /** + * Returns a descriptive label for the running NameNode. If the NameNode has + * initialized to the point of running its RPC server, then this label consists + * of the host and port of the RPC server. Otherwise, the label is a message + * stating that the NameNode is still initializing. + * + * @param nn NameNode to describe + * @return String NameNode label + */ + static String getNameNodeLabel(NameNode nn) { + return nn.getRpcServer() != null ? nn.getNameNodeAddressHostPortString() : + "initializing"; + } + static class NodeListJsp { private int rowNum = 0; @@ -843,6 +862,9 @@ class NamenodeJspHelper { HttpServletRequest request) throws IOException { final NameNode nn = NameNodeHttpServer.getNameNodeFromContext(context); final FSNamesystem ns = nn.getNamesystem(); + if (ns == null) { + return; + } final DatanodeManager dm = ns.getBlockManager().getDatanodeManager(); final List live = new ArrayList(); @@ -1022,14 +1044,16 @@ class NamenodeJspHelper { final BlockManager blockManager; XMLBlockInfo(FSNamesystem fsn, Long blockId) { - this.blockManager = fsn.getBlockManager(); + this.blockManager = fsn != null ? fsn.getBlockManager() : null; if (blockId == null) { this.block = null; this.inode = null; } else { this.block = new Block(blockId); - this.inode = ((INode)blockManager.getBlockCollection(block)).asFile(); + this.inode = blockManager != null ? + ((INode)blockManager.getBlockCollection(block)).asFile() : + null; } } @@ -1103,8 +1127,10 @@ class NamenodeJspHelper { } doc.startTag("replicas"); - for(final Iterator it = blockManager.datanodeIterator(block); - it.hasNext(); ) { + for (final Iterator it = blockManager != null ? + blockManager.datanodeIterator(block) : + Collections.emptyList().iterator(); + it.hasNext();) { doc.startTag("replica"); DatanodeDescriptor dd = it.next(); @@ -1140,7 +1166,7 @@ class NamenodeJspHelper { XMLCorruptBlockInfo(FSNamesystem fsn, Configuration conf, int numCorruptBlocks, Long startingBlockId) { - this.blockManager = fsn.getBlockManager(); + this.blockManager = fsn != null ? fsn.getBlockManager() : null; this.conf = conf; this.numCorruptBlocks = numCorruptBlocks; this.startingBlockId = startingBlockId; @@ -1163,16 +1189,19 @@ class NamenodeJspHelper { doc.endTag(); doc.startTag("num_missing_blocks"); - doc.pcdata(""+blockManager.getMissingBlocksCount()); + doc.pcdata("" + (blockManager != null ? + blockManager.getMissingBlocksCount() : 0)); doc.endTag(); doc.startTag("num_corrupt_replica_blocks"); - doc.pcdata(""+blockManager.getCorruptReplicaBlocksCount()); + doc.pcdata("" + (blockManager != null ? + blockManager.getCorruptReplicaBlocksCount() : 0)); doc.endTag(); doc.startTag("corrupt_replica_block_ids"); - final long[] corruptBlockIds = blockManager.getCorruptReplicaBlockIds( - numCorruptBlocks, startingBlockId); + final long[] corruptBlockIds = blockManager != null ? + blockManager.getCorruptReplicaBlockIds(numCorruptBlocks, + startingBlockId) : null; if (corruptBlockIds != null) { for (Long blockId: corruptBlockIds) { doc.startTag("block_id"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp index 04820754dc5..7c9050ddb1c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp @@ -25,6 +25,7 @@ import="org.apache.hadoop.fs.Path" import="org.apache.hadoop.ha.HAServiceProtocol.HAServiceState" import="java.util.Collection" + import="java.util.Collections" import="java.util.Arrays" %> <%!//for java.io.Serializable private static final long serialVersionUID = 1L;%> @@ -34,9 +35,10 @@ HAServiceState nnHAState = nn.getServiceState(); boolean isActive = (nnHAState == HAServiceState.ACTIVE); String namenodeRole = nn.getRole().toString(); - String namenodeLabel = nn.getNameNodeAddressHostPortString(); - Collection corruptFileBlocks = - fsn.listCorruptFileBlocks("/", null); + String namenodeLabel = NamenodeJspHelper.getNameNodeLabel(nn); + Collection corruptFileBlocks = fsn != null ? + fsn.listCorruptFileBlocks("/", null) : + Collections.emptyList(); int corruptFileCount = corruptFileBlocks.size(); %> @@ -48,7 +50,7 @@

<%=namenodeRole%> '<%=namenodeLabel%>'

<%=NamenodeJspHelper.getVersionTable(fsn)%>
-<% if (isActive) { %> +<% if (isActive && fsn != null) { %> Browse the filesystem
<% } %> diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp index 36f6199d978..10872a7af09 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp @@ -34,29 +34,20 @@ boolean isActive = (nnHAState == HAServiceState.ACTIVE); String namenodeRole = nn.getRole().toString(); String namenodeState = nnHAState.toString(); - String namenodeLabel = nn.getRpcServer() != null ? - nn.getNameNodeAddressHostPortString() : null; + String namenodeLabel = NamenodeJspHelper.getNameNodeLabel(nn); %> -<% if (namenodeLabel != null) { %> Hadoop <%=namenodeRole%> <%=namenodeLabel%> -<% } else { %> -Hadoop <%=namenodeRole%> -<% } %> -<% if (namenodeLabel != null) { %>

<%=namenodeRole%> '<%=namenodeLabel%>' (<%=namenodeState%>)

-<% } else { %> -

<%=namenodeRole%> (<%=namenodeState%>)

-<% } %> <%= NamenodeJspHelper.getVersionTable(fsn) %>
-<% if (isActive) { %> +<% if (isActive && fsn != null) { %> Browse the filesystem
<% } %> <%=namenodeRole%> Logs diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp index 446104a0af5..3bb34986038 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp @@ -33,7 +33,7 @@ String namenodeRole = nn.getRole().toString(); FSNamesystem fsn = nn.getNamesystem(); HAServiceState nnHAState = nn.getServiceState(); boolean isActive = (nnHAState == HAServiceState.ACTIVE); -String namenodeLabel = nn.getNameNodeAddressHostPortString(); +String namenodeLabel = NamenodeJspHelper.getNameNodeLabel(nn); %> @@ -46,7 +46,7 @@ String namenodeLabel = nn.getNameNodeAddressHostPortString();

<%=namenodeRole%> '<%=namenodeLabel%>'

<%= NamenodeJspHelper.getVersionTable(fsn) %>
-<% if (isActive) { %> +<% if (isActive && fsn != null) { %> Browse the filesystem
<% } %> <%=namenodeRole%> Logs
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeJspHelper.java index a847a9438a3..3207f0ccb01 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeJspHelper.java @@ -25,12 +25,15 @@ import static org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase.SAVIN import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.io.IOException; import java.util.List; import java.util.regex.Pattern; +import javax.servlet.ServletContext; import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; import javax.servlet.jsp.JspWriter; import org.apache.hadoop.conf.Configuration; @@ -45,6 +48,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; +import org.znerd.xmlenc.XMLOutputter; public class TestNameNodeJspHelper { @@ -117,6 +121,75 @@ public class TestNameNodeJspHelper { Assert.assertEquals("", NamenodeJspHelper.getRollingUpgradeText(null)); } + /** + * Tests for non-null, non-empty NameNode label. + */ + @Test + public void testGetNameNodeLabel() { + String nameNodeLabel = NamenodeJspHelper.getNameNodeLabel( + cluster.getNameNode()); + Assert.assertNotNull(nameNodeLabel); + Assert.assertFalse(nameNodeLabel.isEmpty()); + } + + /** + * Tests for non-null, non-empty NameNode label when called before + * initialization of the NameNode RPC server. + */ + @Test + public void testGetNameNodeLabelNullRpcServer() { + NameNode nn = mock(NameNode.class); + when(nn.getRpcServer()).thenReturn(null); + String nameNodeLabel = NamenodeJspHelper.getNameNodeLabel( + cluster.getNameNode()); + Assert.assertNotNull(nameNodeLabel); + Assert.assertFalse(nameNodeLabel.isEmpty()); + } + + /** + * Tests that passing a null FSNamesystem to generateSnapshotReport does not + * throw NullPointerException. + */ + @Test + public void testGenerateSnapshotReportNullNamesystem() throws Exception { + NamenodeJspHelper.generateSnapshotReport(mock(JspWriter.class), null); + } + + /** + * Tests that redirectToRandomDataNode does not throw NullPointerException if + * it finds a null FSNamesystem. + */ + @Test(expected=IOException.class) + public void testRedirectToRandomDataNodeNullNamesystem() throws Exception { + NameNode nn = mock(NameNode.class); + when(nn.getNamesystem()).thenReturn(null); + ServletContext context = mock(ServletContext.class); + when(context.getAttribute("name.node")).thenReturn(nn); + NamenodeJspHelper.redirectToRandomDataNode(context, + mock(HttpServletRequest.class), mock(HttpServletResponse.class)); + } + + /** + * Tests that XMLBlockInfo does not throw NullPointerException if it finds a + * null FSNamesystem. + */ + @Test + public void testXMLBlockInfoNullNamesystem() throws IOException { + XMLOutputter doc = new XMLOutputter(mock(JspWriter.class), "UTF-8"); + new NamenodeJspHelper.XMLBlockInfo(null, 1L).toXML(doc); + } + + /** + * Tests that XMLCorruptBlockInfo does not throw NullPointerException if it + * finds a null FSNamesystem. + */ + @Test + public void testXMLCorruptBlockInfoNullNamesystem() throws IOException { + XMLOutputter doc = new XMLOutputter(mock(JspWriter.class), "UTF-8"); + new NamenodeJspHelper.XMLCorruptBlockInfo(null, mock(Configuration.class), + 10, 1L).toXML(doc); + } + /** * Checks if the list contains any string that partially matches the regex. * From cb44a4b850e97e5dcce101465c033683a738fe0a Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Wed, 2 Oct 2013 06:31:27 +0000 Subject: [PATCH 022/133] YARN-1260. Added webapp.http.address to yarn-default.xml so that default install with https enabled doesn't have broken link on NM UI. Contributed by Omkar Vinit Joshi. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528312 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 ++++ .../src/main/resources/yarn-default.xml | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a4b289b7991..b112991f85e 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -117,6 +117,10 @@ Release 2.1.2 - UNRELEASED YARN-1262. TestApplicationCleanup relies on all containers assigned in a single heartbeat (Karthik Kambatla via Sandy Ryza) + YARN-1260. Added webapp.http.address to yarn-default.xml so that default + install with https enabled doesn't have broken link on NM UI. (Omkar Vinit + Joshi via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index a77115824d2..171b118b7bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -100,11 +100,17 @@ - The address of the RM web application. + The http address of the RM web application. yarn.resourcemanager.webapp.address ${yarn.resourcemanager.hostname}:8088 + + The https adddress of the RM web application. + yarn.resourcemanager.webapp.https.address + ${yarn.resourcemanager.hostname}:8090 + + yarn.resourcemanager.resource-tracker.address ${yarn.resourcemanager.hostname}:8031 From 97a8ebe849b5e74f8e9452fb60e3b75d72a987ac Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Wed, 2 Oct 2013 14:48:12 +0000 Subject: [PATCH 023/133] HADOOP-9063. enhance unit-test coverage of class org.apache.hadoop.fs.FileUtil. Contributed by Ivan A. Veselovsky git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528502 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../org/apache/hadoop/fs/TestFileUtil.java | 300 +++++++++++++++++- 2 files changed, 291 insertions(+), 12 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 836a86dc54b..37b297bd7eb 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -339,6 +339,9 @@ Release 2.3.0 - UNRELEASED HADOOP-10006. Compilation failure in trunk for o.a.h.fs.swift.util.JSONUtil (Junping Du via stevel) + HADOOP-9063. enhance unit-test coverage of class + org.apache.hadoop.fs.FileUtil (Ivan A. Veselovsky via jlowe) + OPTIMIZATIONS HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java index f37bf4f0c04..3eaa97f2471 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java @@ -24,6 +24,8 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; @@ -32,15 +34,20 @@ import java.util.List; import java.util.jar.Attributes; import java.util.jar.JarFile; import java.util.jar.Manifest; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; +import org.apache.tools.tar.TarEntry; +import org.apache.tools.tar.TarOutputStream; import org.junit.After; import org.junit.Assert; import org.junit.Test; +import static org.junit.Assert.*; public class TestFileUtil { private static final Log LOG = LogFactory.getLog(TestFileUtil.class); @@ -48,14 +55,14 @@ public class TestFileUtil { private static final String TEST_ROOT_DIR = System.getProperty( "test.build.data", "/tmp") + "/fu"; private static final File TEST_DIR = new File(TEST_ROOT_DIR); - private static String FILE = "x"; - private static String LINK = "y"; - private static String DIR = "dir"; - private File del = new File(TEST_DIR, "del"); - private File tmp = new File(TEST_DIR, "tmp"); - private File dir1 = new File(del, DIR + "1"); - private File dir2 = new File(del, DIR + "2"); - private File partitioned = new File(TEST_DIR, "partitioned"); + private static final String FILE = "x"; + private static final String LINK = "y"; + private static final String DIR = "dir"; + private final File del = new File(TEST_DIR, "del"); + private final File tmp = new File(TEST_DIR, "tmp"); + private final File dir1 = new File(del, DIR + "1"); + private final File dir2 = new File(del, DIR + "2"); + private final File partitioned = new File(TEST_DIR, "partitioned"); /** * Creates multiple directories for testing. @@ -116,17 +123,17 @@ public class TestFileUtil { * @param contents String non-null file contents. * @throws IOException if an I/O error occurs. */ - private void createFile(File directory, String name, String contents) + private File createFile(File directory, String name, String contents) throws IOException { File newFile = new File(directory, name); PrintWriter pw = new PrintWriter(newFile); - try { pw.println(contents); } finally { pw.close(); } + return newFile; } @Test (timeout = 30000) @@ -553,14 +560,283 @@ public class TestFileUtil { * @throws IOException */ @Test (timeout = 30000) - public void testGetDU() throws IOException { + public void testGetDU() throws Exception { setupDirs(); long du = FileUtil.getDU(TEST_DIR); // Only two files (in partitioned). Each has 3 characters + system-specific // line separator. - long expected = 2 * (3 + System.getProperty("line.separator").length()); + final long expected = 2 * (3 + System.getProperty("line.separator").length()); Assert.assertEquals(expected, du); + + // target file does not exist: + final File doesNotExist = new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog"); + long duDoesNotExist = FileUtil.getDU(doesNotExist); + assertEquals(0, duDoesNotExist); + + // target file is not a directory: + File notADirectory = new File(partitioned, "part-r-00000"); + long duNotADirectoryActual = FileUtil.getDU(notADirectory); + long duNotADirectoryExpected = 3 + System.getProperty("line.separator").length(); + assertEquals(duNotADirectoryExpected, duNotADirectoryActual); + + try { + // one of target files is not accessible, but the containing directory + // is accessible: + try { + FileUtil.chmod(notADirectory.getAbsolutePath(), "0000"); + } catch (InterruptedException ie) { + // should never happen since that method never throws InterruptedException. + assertNull(ie); + } + assertFalse(notADirectory.canRead()); + final long du3 = FileUtil.getDU(partitioned); + assertEquals(expected, du3); + + // some target files and containing directory are not accessible: + try { + FileUtil.chmod(partitioned.getAbsolutePath(), "0000"); + } catch (InterruptedException ie) { + // should never happen since that method never throws InterruptedException. + assertNull(ie); + } + assertFalse(partitioned.canRead()); + final long du4 = FileUtil.getDU(partitioned); + assertEquals(0, du4); + } finally { + // Restore the permissions so that we can delete the folder + // in @After method: + FileUtil.chmod(partitioned.getAbsolutePath(), "0777", true/*recursive*/); + } + } + + @Test (timeout = 30000) + public void testUnTar() throws IOException { + setupDirs(); + + // make a simple tar: + final File simpleTar = new File(del, FILE); + OutputStream os = new FileOutputStream(simpleTar); + TarOutputStream tos = new TarOutputStream(os); + try { + TarEntry te = new TarEntry("foo"); + byte[] data = "some-content".getBytes("UTF-8"); + te.setSize(data.length); + tos.putNextEntry(te); + tos.write(data); + tos.closeEntry(); + tos.flush(); + tos.finish(); + } finally { + tos.close(); + } + + // successfully untar it into an existing dir: + FileUtil.unTar(simpleTar, tmp); + // check result: + assertTrue(new File(tmp, "foo").exists()); + assertEquals(12, new File(tmp, "foo").length()); + + final File regularFile = new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog"); + regularFile.createNewFile(); + assertTrue(regularFile.exists()); + try { + FileUtil.unTar(simpleTar, regularFile); + assertTrue("An IOException expected.", false); + } catch (IOException ioe) { + // okay + } + } + + @Test (timeout = 30000) + public void testReplaceFile() throws IOException { + setupDirs(); + final File srcFile = new File(tmp, "src"); + + // src exists, and target does not exist: + srcFile.createNewFile(); + assertTrue(srcFile.exists()); + final File targetFile = new File(tmp, "target"); + assertTrue(!targetFile.exists()); + FileUtil.replaceFile(srcFile, targetFile); + assertTrue(!srcFile.exists()); + assertTrue(targetFile.exists()); + + // src exists and target is a regular file: + srcFile.createNewFile(); + assertTrue(srcFile.exists()); + FileUtil.replaceFile(srcFile, targetFile); + assertTrue(!srcFile.exists()); + assertTrue(targetFile.exists()); + + // src exists, and target is a non-empty directory: + srcFile.createNewFile(); + assertTrue(srcFile.exists()); + targetFile.delete(); + targetFile.mkdirs(); + File obstacle = new File(targetFile, "obstacle"); + obstacle.createNewFile(); + assertTrue(obstacle.exists()); + assertTrue(targetFile.exists() && targetFile.isDirectory()); + try { + FileUtil.replaceFile(srcFile, targetFile); + assertTrue(false); + } catch (IOException ioe) { + // okay + } + // check up the post-condition: nothing is deleted: + assertTrue(srcFile.exists()); + assertTrue(targetFile.exists() && targetFile.isDirectory()); + assertTrue(obstacle.exists()); + } + + @Test (timeout = 30000) + public void testCreateLocalTempFile() throws IOException { + setupDirs(); + final File baseFile = new File(tmp, "base"); + File tmp1 = FileUtil.createLocalTempFile(baseFile, "foo", false); + File tmp2 = FileUtil.createLocalTempFile(baseFile, "foo", true); + assertFalse(tmp1.getAbsolutePath().equals(baseFile.getAbsolutePath())); + assertFalse(tmp2.getAbsolutePath().equals(baseFile.getAbsolutePath())); + assertTrue(tmp1.exists() && tmp2.exists()); + assertTrue(tmp1.canWrite() && tmp2.canWrite()); + assertTrue(tmp1.canRead() && tmp2.canRead()); + tmp1.delete(); + tmp2.delete(); + assertTrue(!tmp1.exists() && !tmp2.exists()); + } + + @Test (timeout = 30000) + public void testUnZip() throws IOException { + // make sa simple zip + setupDirs(); + + // make a simple tar: + final File simpleZip = new File(del, FILE); + OutputStream os = new FileOutputStream(simpleZip); + ZipOutputStream tos = new ZipOutputStream(os); + try { + ZipEntry ze = new ZipEntry("foo"); + byte[] data = "some-content".getBytes("UTF-8"); + ze.setSize(data.length); + tos.putNextEntry(ze); + tos.write(data); + tos.closeEntry(); + tos.flush(); + tos.finish(); + } finally { + tos.close(); + } + + // successfully untar it into an existing dir: + FileUtil.unZip(simpleZip, tmp); + // check result: + assertTrue(new File(tmp, "foo").exists()); + assertEquals(12, new File(tmp, "foo").length()); + + final File regularFile = new File(tmp, "QuickBrownFoxJumpsOverTheLazyDog"); + regularFile.createNewFile(); + assertTrue(regularFile.exists()); + try { + FileUtil.unZip(simpleZip, regularFile); + assertTrue("An IOException expected.", false); + } catch (IOException ioe) { + // okay + } + } + + @Test (timeout = 30000) + /* + * Test method copy(FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf) + */ + public void testCopy5() throws IOException { + setupDirs(); + + URI uri = tmp.toURI(); + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.newInstance(uri, conf); + final String content = "some-content"; + File srcFile = createFile(tmp, "src", content); + Path srcPath = new Path(srcFile.toURI()); + + // copy regular file: + final File dest = new File(del, "dest"); + boolean result = FileUtil.copy(fs, srcPath, dest, false, conf); + assertTrue(result); + assertTrue(dest.exists()); + assertEquals(content.getBytes().length + + System.getProperty("line.separator").getBytes().length, dest.length()); + assertTrue(srcFile.exists()); // should not be deleted + + // copy regular file, delete src: + dest.delete(); + assertTrue(!dest.exists()); + result = FileUtil.copy(fs, srcPath, dest, true, conf); + assertTrue(result); + assertTrue(dest.exists()); + assertEquals(content.getBytes().length + + System.getProperty("line.separator").getBytes().length, dest.length()); + assertTrue(!srcFile.exists()); // should be deleted + + // copy a dir: + dest.delete(); + assertTrue(!dest.exists()); + srcPath = new Path(partitioned.toURI()); + result = FileUtil.copy(fs, srcPath, dest, true, conf); + assertTrue(result); + assertTrue(dest.exists() && dest.isDirectory()); + File[] files = dest.listFiles(); + assertTrue(files != null); + assertEquals(2, files.length); + for (File f: files) { + assertEquals(3 + + System.getProperty("line.separator").getBytes().length, f.length()); + } + assertTrue(!partitioned.exists()); // should be deleted + } + + @Test (timeout = 30000) + public void testStat2Paths1() { + assertNull(FileUtil.stat2Paths(null)); + + FileStatus[] fileStatuses = new FileStatus[0]; + Path[] paths = FileUtil.stat2Paths(fileStatuses); + assertEquals(0, paths.length); + + Path path1 = new Path("file://foo"); + Path path2 = new Path("file://moo"); + fileStatuses = new FileStatus[] { + new FileStatus(3, false, 0, 0, 0, path1), + new FileStatus(3, false, 0, 0, 0, path2) + }; + paths = FileUtil.stat2Paths(fileStatuses); + assertEquals(2, paths.length); + assertEquals(paths[0], path1); + assertEquals(paths[1], path2); + } + + @Test (timeout = 30000) + public void testStat2Paths2() { + Path defaultPath = new Path("file://default"); + Path[] paths = FileUtil.stat2Paths(null, defaultPath); + assertEquals(1, paths.length); + assertEquals(defaultPath, paths[0]); + + paths = FileUtil.stat2Paths(null, null); + assertTrue(paths != null); + assertEquals(1, paths.length); + assertEquals(null, paths[0]); + + Path path1 = new Path("file://foo"); + Path path2 = new Path("file://moo"); + FileStatus[] fileStatuses = new FileStatus[] { + new FileStatus(3, false, 0, 0, 0, path1), + new FileStatus(3, false, 0, 0, 0, path2) + }; + paths = FileUtil.stat2Paths(fileStatuses, defaultPath); + assertEquals(2, paths.length); + assertEquals(paths[0], path1); + assertEquals(paths[1], path2); } @Test (timeout = 30000) From d14584dec5244f87cec6f415292e6e6fa9cd91ad Mon Sep 17 00:00:00 2001 From: Jonathan Turner Eagles Date: Wed, 2 Oct 2013 15:45:11 +0000 Subject: [PATCH 024/133] YARN-677. Increase coverage to FairScheduler (Vadim Bondarev and Dennis Y via jeagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528524 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/fair/TestFairScheduler.java | 93 +++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b112991f85e..a934f6b93c1 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -44,6 +44,9 @@ Release 2.3.0 - UNRELEASED YARN-819. ResourceManager and NodeManager should check for a minimum allowed version (Robert Parker via jeagles) + YARN-677. Increase coverage to FairScheduler (Vadim Bondarev and Dennis Y + via jeagles) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 84c9a37d5d4..3086afcb5a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -39,6 +39,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import javax.xml.parsers.ParserConfigurationException; @@ -86,6 +87,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedS import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; @@ -100,6 +103,9 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import org.xml.sax.SAXException; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.UnmodifiableIterator; + public class TestFairScheduler { private class MockClock implements Clock { @@ -2312,4 +2318,91 @@ public class TestFairScheduler { Assert.assertEquals(1, consumption.getVirtualCores()); } + + @Test + public void testAggregateCapacityTrackingWithPreemptionEnabled() throws Exception { + int KB = 1024; + int iterationNumber = 10; + Configuration conf = createConfiguration(); + conf.setBoolean("yarn.scheduler.fair.preemption", true); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(KB * iterationNumber)); + NodeAddedSchedulerEvent nodeAddEvent = new NodeAddedSchedulerEvent(node); + scheduler.handle(nodeAddEvent); + + for (int i = 0; i < iterationNumber; i++) { + createSchedulingRequest(KB, "queue1", "user1", 1); + scheduler.update(); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); + scheduler.handle(updateEvent); + + assertEquals(KB, + scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemory()); + TimeUnit.SECONDS.sleep(1); + } + } + + private static final class ExternalAppAddedSchedulerEvent extends SchedulerEvent { + public ExternalAppAddedSchedulerEvent() { + super(SchedulerEventType.APP_ADDED); + } + } + + private static final class ExternalNodeRemovedSchedulerEvent extends SchedulerEvent { + public ExternalNodeRemovedSchedulerEvent() { + super(SchedulerEventType.NODE_REMOVED); + } + } + + private static final class ExternalNodeUpdateSchedulerEvent extends SchedulerEvent { + public ExternalNodeUpdateSchedulerEvent() { + super(SchedulerEventType.NODE_UPDATE); + } + } + + private static final class ExternalNodeAddedSchedulerEvent extends SchedulerEvent { + public ExternalNodeAddedSchedulerEvent() { + super(SchedulerEventType.NODE_ADDED); + } + } + + private static final class ExternalAppRemovedSchedulerEvent extends SchedulerEvent { + public ExternalAppRemovedSchedulerEvent() { + super(SchedulerEventType.APP_REMOVED); + } + } + + private static final class ExternalContainerExpiredSchedulerEvent extends SchedulerEvent { + public ExternalContainerExpiredSchedulerEvent() { + super(SchedulerEventType.CONTAINER_EXPIRED); + } + } + + /** + * try to handle external events type + * and get {@code RuntimeException} + * + * @throws Exception + */ + @Test + public void testSchedulerHandleFailWithExternalEvents() throws Exception { + Configuration conf = createConfiguration(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + ImmutableSet externalEvents = ImmutableSet.of(new ExternalAppAddedSchedulerEvent(), + new ExternalNodeRemovedSchedulerEvent(), new ExternalNodeUpdateSchedulerEvent(), + new ExternalNodeAddedSchedulerEvent(), new ExternalAppRemovedSchedulerEvent(), + new ExternalContainerExpiredSchedulerEvent()); + + UnmodifiableIterator iter = externalEvents.iterator(); + while(iter.hasNext()) + handleExternalEvent(iter.next()); + } + + private void handleExternalEvent(SchedulerEvent event) throws Exception { + try { + scheduler.handle(event); + } catch(RuntimeException ex) { + //expected + } + } } From 07ee20503841a946eaa2e282fbf392efb15e7dd7 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Wed, 2 Oct 2013 16:08:45 +0000 Subject: [PATCH 025/133] MAPREDUCE-5530. Fix compat with hadoop-1 in mapred.lib.CombinFileInputFormat by re-introducing isSplittable(FileSystem, Path) api and ensuring semantic compatibility. Contributed by Robert Kanter. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528533 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 5 +++ .../mapred/lib/CombineFileInputFormat.java | 36 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 303fec6fcb9..40852d7e257 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -260,6 +260,11 @@ Release 2.1.2 - UNRELEASED MAPREDUCE-5536. Fixed MR AM and JHS to respect mapreduce.jobhistory.webapp.https.address. (Omkar Vinit Joshi via vinodkv) + MAPREDUCE-5530. Fix compat with hadoop-1 in + mapred.lib.CombinFileInputFormat by re-introducing + isSplittable(FileSystem, Path) api and ensuring semantic compatibility. + (Robert Kanter via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java index 1401fc29704..b9297f851bb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java @@ -24,13 +24,19 @@ import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.SplittableCompressionCodec; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.TaskAttemptContext; /** @@ -127,4 +133,34 @@ public abstract class CombineFileInputFormat return result.toArray(new FileStatus[result.size()]); } + /** + * Subclasses should avoid overriding this method and should instead only + * override {@link #isSplitable(FileSystem, Path)}. The implementation of + * this method simply calls the other method to preserve compatibility. + * @see + * MAPREDUCE-5530 + * + * @param context the job context + * @param file the file name to check + * @return is this file splitable? + */ + @InterfaceAudience.Private + @Override + protected boolean isSplitable(JobContext context, Path file) { + try { + return isSplitable(FileSystem.get(context.getConfiguration()), file); + } + catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + + protected boolean isSplitable(FileSystem fs, Path file) { + final CompressionCodec codec = + new CompressionCodecFactory(fs.getConf()).getCodec(file); + if (null == codec) { + return true; + } + return codec instanceof SplittableCompressionCodec; + } } From d9610d5299cb8e7f2321504dd2858ccff51df123 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Wed, 2 Oct 2013 20:50:42 +0000 Subject: [PATCH 026/133] HADOOP-9254. Cover packages org.apache.hadoop.util.bloom, org.apache.hadoop.util.hash. Contributed by Vadim Bondarev git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528620 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../util/bloom/BloomFilterCommonTester.java | 533 ++++++++++++++++++ .../hadoop/util/bloom/TestBloomFilters.java | 240 ++++++++ .../org/apache/hadoop/util/hash/TestHash.java | 89 +++ 4 files changed, 865 insertions(+) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/BloomFilterCommonTester.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/hash/TestHash.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 37b297bd7eb..77b6212c2ed 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -342,6 +342,9 @@ Release 2.3.0 - UNRELEASED HADOOP-9063. enhance unit-test coverage of class org.apache.hadoop.fs.FileUtil (Ivan A. Veselovsky via jlowe) + HADOOP-9254. Cover packages org.apache.hadoop.util.bloom, + org.apache.hadoop.util.hash (Vadim Bondarev via jlowe) + OPTIMIZATIONS HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/BloomFilterCommonTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/BloomFilterCommonTester.java new file mode 100644 index 00000000000..ebeab6f2dc2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/BloomFilterCommonTester.java @@ -0,0 +1,533 @@ +package org.apache.hadoop.util.bloom; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.AbstractCollection; +import java.util.Collection; +import java.util.Iterator; +import java.util.Random; + +import org.junit.Assert; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.util.hash.Hash; +import org.apache.log4j.Logger; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +public class BloomFilterCommonTester { + + private static final double LN2 = Math.log(2); + private static final double LN2_SQUARED = LN2 * LN2; + + private final int hashType; + private final int numInsertions; + + private final ImmutableList.Builder builder = ImmutableList.builder(); + + private ImmutableSet filterTestStrateges; + + private final PreAssertionHelper preAssertionHelper; + + static int optimalNumOfBits(int n, double p) { + return (int) (-n * Math.log(p) / LN2_SQUARED); + } + + public static BloomFilterCommonTester of(int hashId, + int numInsertions) { + return new BloomFilterCommonTester(hashId, numInsertions); + } + + public BloomFilterCommonTester withFilterInstance(T filter) { + builder.add(filter); + return this; + } + + private BloomFilterCommonTester(int hashId, int numInsertions) { + this.hashType = hashId; + this.numInsertions = numInsertions; + + this.preAssertionHelper = new PreAssertionHelper() { + + @Override + public ImmutableSet falsePositives(int hashId) { + switch (hashId) { + case Hash.JENKINS_HASH: { + // // false pos for odd and event under 1000 + return ImmutableSet.of(99, 963); + } + case Hash.MURMUR_HASH: { + // false pos for odd and event under 1000 + return ImmutableSet.of(769, 772, 810, 874); + } + default: { + // fail fast with unknown hash error !!! + Assert.assertFalse("unknown hash error", true); + return ImmutableSet.of(); + } + } + } + }; + } + + public BloomFilterCommonTester withTestCases( + ImmutableSet filterTestStrateges) { + this.filterTestStrateges = ImmutableSet.copyOf(filterTestStrateges); + return this; + } + + @SuppressWarnings("unchecked") + public void test() { + final ImmutableList filtersList = builder.build(); + final ImmutableSet falsePositives = preAssertionHelper + .falsePositives(hashType); + + for (T filter : filtersList) { + for (BloomFilterTestStrategy strategy : filterTestStrateges) { + strategy.getStrategy().assertWhat(filter, numInsertions, hashType, falsePositives); + // create fresh instance for next test iteration + filter = (T) getSymmetricFilter(filter.getClass(), numInsertions, hashType); + } + } + } + + interface FilterTesterStrategy { + final Logger logger = Logger.getLogger(FilterTesterStrategy.class); + + void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives); + } + + private static Filter getSymmetricFilter(Class filterClass, + int numInsertions, int hashType) { + int bitSetSize = optimalNumOfBits(numInsertions, 0.03); + int hashFunctionNumber = 5; + + if (filterClass == BloomFilter.class) { + return new BloomFilter(bitSetSize, hashFunctionNumber, hashType); + } else if (filterClass == CountingBloomFilter.class) { + return new CountingBloomFilter(bitSetSize, hashFunctionNumber, hashType); + } else if (filterClass == RetouchedBloomFilter.class) { + return new RetouchedBloomFilter(bitSetSize, hashFunctionNumber, hashType); + } else if (filterClass == DynamicBloomFilter.class) { + return new DynamicBloomFilter(bitSetSize, hashFunctionNumber, hashType, 3); + } else { + //fail fast + assertFalse("unexpected filterClass", true); + return null; + } + } + + public enum BloomFilterTestStrategy { + + ADD_KEYS_STRATEGY(new FilterTesterStrategy() { + + private final ImmutableList keys = ImmutableList.of(new Key( + new byte[] { 49, 48, 48 }), new Key(new byte[] { 50, 48, 48 })); + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + + filter.add(keys); + + assertTrue(" might contain key error ", + filter.membershipTest(new Key("100".getBytes()))); + assertTrue(" might contain key error ", + filter.membershipTest(new Key("200".getBytes()))); + + filter.add(keys.toArray(new Key[] {})); + + assertTrue(" might contain key error ", + filter.membershipTest(new Key("100".getBytes()))); + assertTrue(" might contain key error ", + filter.membershipTest(new Key("200".getBytes()))); + + filter.add(new AbstractCollection() { + + @Override + public Iterator iterator() { + return keys.iterator(); + } + + @Override + public int size() { + return keys.size(); + } + + }); + + assertTrue(" might contain key error ", + filter.membershipTest(new Key("100".getBytes()))); + assertTrue(" might contain key error ", + filter.membershipTest(new Key("200".getBytes()))); + } + }), + + KEY_TEST_STRATEGY(new FilterTesterStrategy() { + + private void checkOnKeyMethods() { + String line = "werabsdbe"; + + Key key = new Key(line.getBytes()); + assertTrue("default key weight error ", key.getWeight() == 1d); + + key.set(line.getBytes(), 2d); + assertTrue(" setted key weight error ", key.getWeight() == 2d); + + Key sKey = new Key(line.getBytes(), 2d); + assertTrue("equals error", key.equals(sKey)); + assertTrue("hashcode error", key.hashCode() == sKey.hashCode()); + + sKey = new Key(line.concat("a").getBytes(), 2d); + assertFalse("equals error", key.equals(sKey)); + assertFalse("hashcode error", key.hashCode() == sKey.hashCode()); + + sKey = new Key(line.getBytes(), 3d); + assertFalse("equals error", key.equals(sKey)); + assertFalse("hashcode error", key.hashCode() == sKey.hashCode()); + + key.incrementWeight(); + assertTrue("weight error", key.getWeight() == 3d); + + key.incrementWeight(2d); + assertTrue("weight error", key.getWeight() == 5d); + } + + private void checkOnReadWrite() { + String line = "qryqeb354645rghdfvbaq23312fg"; + DataOutputBuffer out = new DataOutputBuffer(); + DataInputBuffer in = new DataInputBuffer(); + Key originKey = new Key(line.getBytes(), 100d); + try { + originKey.write(out); + in.reset(out.getData(), out.getData().length); + Key restoredKey = new Key(new byte[] { 0 }); + assertFalse("checkOnReadWrite equals error", restoredKey.equals(originKey)); + restoredKey.readFields(in); + assertTrue("checkOnReadWrite equals error", restoredKey.equals(originKey)); + out.reset(); + } catch (Exception ioe) { + Assert.fail("checkOnReadWrite ex error"); + } + } + + private void checkSetOnIAE() { + Key key = new Key(); + try { + key.set(null, 0); + } catch (IllegalArgumentException ex) { + // expected + } catch (Exception e) { + Assert.fail("checkSetOnIAE ex error"); + } + } + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + checkOnKeyMethods(); + checkOnReadWrite(); + checkSetOnIAE(); + } + }), + + EXCEPTIONS_CHECK_STRATEGY(new FilterTesterStrategy() { + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + checkAddOnNPE(filter); + checkTestMembershipOnNPE(filter); + checkAndOnIAE(filter); + } + + private void checkAndOnIAE(Filter filter) { + Filter tfilter = null; + + try { + Collection keys = null; + filter.add(keys); + } catch (IllegalArgumentException ex) { + // + } catch (Exception e) { + Assert.fail("" + e); + } + + try { + Key[] keys = null; + filter.add(keys); + } catch (IllegalArgumentException ex) { + // + } catch (Exception e) { + Assert.fail("" + e); + } + + try { + ImmutableList keys = null; + filter.add(keys); + } catch (IllegalArgumentException ex) { + // + } catch (Exception e) { + Assert.fail("" + e); + } + + try { + filter.and(tfilter); + } catch (IllegalArgumentException ex) { + // expected + } catch (Exception e) { + Assert.fail("" + e); + } + + try { + filter.or(tfilter); + } catch (IllegalArgumentException ex) { + // expected + } catch (Exception e) { + Assert.fail("" + e); + } + + try { + filter.xor(tfilter); + } catch (IllegalArgumentException ex) { + // expected + } catch (UnsupportedOperationException unex) { + // + } catch (Exception e) { + Assert.fail("" + e); + } + + } + + private void checkTestMembershipOnNPE(Filter filter) { + try { + Key nullKey = null; + filter.membershipTest(nullKey); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + Assert.fail("" + e); + } + } + + private void checkAddOnNPE(Filter filter) { + try { + Key nullKey = null; + filter.add(nullKey); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + Assert.fail("" + e); + } + } + }), + + ODD_EVEN_ABSENT_STRATEGY(new FilterTesterStrategy() { + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + + // add all even keys + for (int i = 0; i < numInsertions; i += 2) { + filter.add(new Key(Integer.toString(i).getBytes())); + } + + // check on present even key + for (int i = 0; i < numInsertions; i += 2) { + Assert.assertTrue(" filter might contains " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + + // check on absent odd in event + for (int i = 1; i < numInsertions; i += 2) { + if (!falsePositives.contains(i)) { + assertFalse(" filter should not contain " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + } + } + }), + + WRITE_READ_STRATEGY(new FilterTesterStrategy() { + + private int slotSize = 10; + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + + final Random rnd = new Random(); + final DataOutputBuffer out = new DataOutputBuffer(); + final DataInputBuffer in = new DataInputBuffer(); + try { + Filter tempFilter = getSymmetricFilter(filter.getClass(), + numInsertions, hashId); + ImmutableList.Builder blist = ImmutableList.builder(); + for (int i = 0; i < slotSize; i++) { + blist.add(rnd.nextInt(numInsertions * 2)); + } + + ImmutableList list = blist.build(); + + // mark bits for later check + for (Integer slot : list) { + filter.add(new Key(String.valueOf(slot).getBytes())); + } + + filter.write(out); + in.reset(out.getData(), out.getLength()); + tempFilter.readFields(in); + + for (Integer slot : list) { + assertTrue("read/write mask check filter error on " + slot, + filter.membershipTest(new Key(String.valueOf(slot).getBytes()))); + } + + } catch (IOException ex) { + Assert.fail("error ex !!!" + ex); + } + } + }), + + FILTER_XOR_STRATEGY(new FilterTesterStrategy() { + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + Filter symmetricFilter = getSymmetricFilter(filter.getClass(), + numInsertions, hashId); + try { + // 0 xor 0 -> 0 + filter.xor(symmetricFilter); + // check on present all key + for (int i = 0; i < numInsertions; i++) { + Assert.assertFalse(" filter might contains " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + + // add all even keys + for (int i = 0; i < numInsertions; i += 2) { + filter.add(new Key(Integer.toString(i).getBytes())); + } + + // add all odd keys + for (int i = 0; i < numInsertions; i += 2) { + symmetricFilter.add(new Key(Integer.toString(i).getBytes())); + } + + filter.xor(symmetricFilter); + // 1 xor 1 -> 0 + // check on absent all key + for (int i = 0; i < numInsertions; i++) { + Assert.assertFalse(" filter might not contains " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + + } catch (UnsupportedOperationException ex) { + // not all Filter's implements this method + return; + } + } + }), + + FILTER_AND_STRATEGY(new FilterTesterStrategy() { + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + + int startIntersection = numInsertions - (numInsertions - 100); + int endIntersection = numInsertions - 100; + + Filter partialFilter = getSymmetricFilter(filter.getClass(), + numInsertions, hashId); + + for (int i = 0; i < numInsertions; i++) { + String digit = Integer.toString(i); + filter.add(new Key(digit.getBytes())); + if (i >= startIntersection && i <= endIntersection) { + partialFilter.add(new Key(digit.getBytes())); + } + } + + // do logic AND + filter.and(partialFilter); + + for (int i = 0; i < numInsertions; i++) { + if (i >= startIntersection && i <= endIntersection) { + Assert.assertTrue(" filter might contains " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + } + } + }), + + FILTER_OR_STRATEGY(new FilterTesterStrategy() { + + @Override + public void assertWhat(Filter filter, int numInsertions, int hashId, + ImmutableSet falsePositives) { + Filter evenFilter = getSymmetricFilter(filter.getClass(), + numInsertions, hashId); + + // add all even + for (int i = 0; i < numInsertions; i += 2) { + evenFilter.add(new Key(Integer.toString(i).getBytes())); + } + + // add all odd + for (int i = 1; i < numInsertions; i += 2) { + filter.add(new Key(Integer.toString(i).getBytes())); + } + + // union odd with even + filter.or(evenFilter); + + // check on present all key + for (int i = 0; i < numInsertions; i++) { + Assert.assertTrue(" filter might contains " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + } + }); + + private final FilterTesterStrategy testerStrategy; + + BloomFilterTestStrategy(FilterTesterStrategy testerStrategy) { + this.testerStrategy = testerStrategy; + } + + public FilterTesterStrategy getStrategy() { + return testerStrategy; + } + + } + + interface PreAssertionHelper { + public ImmutableSet falsePositives(int hashId); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java new file mode 100644 index 00000000000..93fa6d5195d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/bloom/TestBloomFilters.java @@ -0,0 +1,240 @@ +package org.apache.hadoop.util.bloom; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.util.AbstractCollection; +import java.util.Iterator; + +import org.apache.hadoop.util.bloom.BloomFilterCommonTester.BloomFilterTestStrategy; +import org.apache.hadoop.util.hash.Hash; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class TestBloomFilters { + + int numInsertions = 1000; + int bitSize = BloomFilterCommonTester.optimalNumOfBits(numInsertions, 0.03); + int hashFunctionNumber = 5; + + private static final ImmutableMap> FALSE_POSITIVE_UNDER_1000 = ImmutableMap + .of(Hash.JENKINS_HASH, new AbstractCollection() { + final ImmutableList falsePositive = ImmutableList. of( + new Key("99".getBytes()), new Key("963".getBytes())); + + @Override + public Iterator iterator() { + return falsePositive.iterator(); + } + + @Override + public int size() { + return falsePositive.size(); + } + }, Hash.MURMUR_HASH, new AbstractCollection() { + final ImmutableList falsePositive = ImmutableList. of( + new Key("769".getBytes()), new Key("772".getBytes()), + new Key("810".getBytes()), new Key("874".getBytes())); + + @Override + public Iterator iterator() { + return falsePositive.iterator(); + } + + @Override + public int size() { + return falsePositive.size(); + } + }); + + private enum Digits { + ODD(1), EVEN(0); + + int start; + + Digits(int start) { + this.start = start; + } + + int getStart() { + return start; + } + } + + @Test + public void testDynamicBloomFilter() { + int hashId = Hash.JENKINS_HASH; + Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber, + Hash.JENKINS_HASH, 3); + BloomFilterCommonTester.of(hashId, numInsertions) + .withFilterInstance(filter) + .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, + BloomFilterTestStrategy.ADD_KEYS_STRATEGY, + BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, + BloomFilterTestStrategy.WRITE_READ_STRATEGY, + BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY)) + .test(); + + assertNotNull("testDynamicBloomFilter error ", filter.toString()); + } + + @Test + public void testCountingBloomFilter() { + int hashId = Hash.JENKINS_HASH; + + CountingBloomFilter filter = new CountingBloomFilter(bitSize, + hashFunctionNumber, hashId); + + Key key = new Key(new byte[] { 48, 48 }); + + filter.add(key); + assertTrue("CountingBloomFilter.membership error ", + filter.membershipTest(key)); + assertTrue("CountingBloomFilter.approximateCount error", + filter.approximateCount(key) == 1); + + filter.add(key); + assertTrue("CountingBloomFilter.approximateCount error", + filter.approximateCount(key) == 2); + + filter.delete(key); + assertTrue("CountingBloomFilter.membership error ", + filter.membershipTest(key)); + + filter.delete(key); + assertFalse("CountingBloomFilter.membership error ", + filter.membershipTest(key)); + assertTrue("CountingBloomFilter.approximateCount error", + filter.approximateCount(key) == 0); + + BloomFilterCommonTester.of(hashId, numInsertions) + .withFilterInstance(filter) + .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, + BloomFilterTestStrategy.ADD_KEYS_STRATEGY, + BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, + BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, + BloomFilterTestStrategy.WRITE_READ_STRATEGY, + BloomFilterTestStrategy.FILTER_OR_STRATEGY, + BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); + } + + @Test + public void testRetouchedBloomFilterSpecific() { + int numInsertions = 1000; + int hashFunctionNumber = 5; + + ImmutableSet hashes = ImmutableSet.of(Hash.MURMUR_HASH, + Hash.JENKINS_HASH); + + for (Integer hashId : hashes) { + RetouchedBloomFilter filter = new RetouchedBloomFilter(bitSize, + hashFunctionNumber, hashId); + + checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD, + RemoveScheme.MAXIMUM_FP); + filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)); + + checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN, + RemoveScheme.MAXIMUM_FP); + filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)); + + checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD, + RemoveScheme.MINIMUM_FN); + filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)); + + checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN, + RemoveScheme.MINIMUM_FN); + filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)); + + checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD, + RemoveScheme.RATIO); + filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)); + + checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN, + RemoveScheme.RATIO); + filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)); + } + } + + private void checkOnAbsentFalsePositive(int hashId, int numInsertions, + final RetouchedBloomFilter filter, Digits digits, short removeSchema) { + AbstractCollection falsePositives = FALSE_POSITIVE_UNDER_1000 + .get(hashId); + + if (falsePositives == null) + Assert.fail(String.format("false positives for hash %d not founded", + hashId)); + + filter.addFalsePositive(falsePositives); + + for (int i = digits.getStart(); i < numInsertions; i += 2) { + filter.add(new Key(Integer.toString(i).getBytes())); + } + + for (Key key : falsePositives) { + filter.selectiveClearing(key, removeSchema); + } + + for (int i = 1 - digits.getStart(); i < numInsertions; i += 2) { + assertFalse(" testRetouchedBloomFilterAddFalsePositive error " + i, + filter.membershipTest(new Key(Integer.toString(i).getBytes()))); + } + } + + @Test + public void testFiltersWithJenkinsHash() { + int hashId = Hash.JENKINS_HASH; + + BloomFilterCommonTester.of(hashId, numInsertions) + .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId)) + .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)) + .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, + BloomFilterTestStrategy.ADD_KEYS_STRATEGY, + BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, + BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, + BloomFilterTestStrategy.WRITE_READ_STRATEGY, + BloomFilterTestStrategy.FILTER_OR_STRATEGY, + BloomFilterTestStrategy.FILTER_AND_STRATEGY, + BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); + } + + @Test + public void testFiltersWithMurmurHash() { + int hashId = Hash.MURMUR_HASH; + + BloomFilterCommonTester.of(hashId, numInsertions) + .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId)) + .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)) + .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, + BloomFilterTestStrategy.ADD_KEYS_STRATEGY, + BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, + BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, + BloomFilterTestStrategy.WRITE_READ_STRATEGY, + BloomFilterTestStrategy.FILTER_OR_STRATEGY, + BloomFilterTestStrategy.FILTER_AND_STRATEGY, + BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/hash/TestHash.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/hash/TestHash.java new file mode 100644 index 00000000000..a6565887368 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/hash/TestHash.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util.hash; + +import static org.junit.Assert.*; +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; + +public class TestHash { + static final String LINE = "34563@45kjkksdf/ljfdb9d8fbusd*89uggjsk Date: Wed, 2 Oct 2013 21:08:27 +0000 Subject: [PATCH 027/133] MAPREDUCE-5459. Update documentation on how to run MRv1 examples on YARN. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528626 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ ...educe_Compatibility_Hadoop1_Hadoop2.apt.vm | 27 ++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 40852d7e257..a5590213ca2 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -265,6 +265,9 @@ Release 2.1.2 - UNRELEASED isSplittable(FileSystem, Path) api and ensuring semantic compatibility. (Robert Kanter via acmurthy) + MAPREDUCE-5459. Update documentation on how to run MRv1 examples on YARN. + (Zhijie Shen via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/MapReduce_Compatibility_Hadoop1_Hadoop2.apt.vm b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/MapReduce_Compatibility_Hadoop1_Hadoop2.apt.vm index aaa5f176e35..bdc2f36a0d6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/MapReduce_Compatibility_Hadoop1_Hadoop2.apt.vm +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/MapReduce_Compatibility_Hadoop1_Hadoop2.apt.vm @@ -96,12 +96,21 @@ Apache Hadoop MapReduce - Migrating from Apache Hadoop 1.x to Apache Hadoop 2.x * {Malicious} - For the users who are going to try <<>> on YARN, - please note that <<>> will still use - <<>>, which is installed together with - other MRv2 jars. By default Hadoop framework jars appear before the users' - jars in the classpath, such that the classes from the 2.x.x jar will still be - picked. Users should either remove <<>> - from the classpath or set <<>> and - <<>> to run their target - examples jar. + For the users who are going to try <<>> on YARN, + please note that <<>> will still use + <<>>, which is installed together with + other MRv2 jars. By default Hadoop framework jars appear before the users' + jars in the classpath, such that the classes from the 2.x.x jar will still be + picked. Users should remove <<>> + from the classpath of all the nodes in a cluster. Otherwise, users need to + set <<>> and + <<>> to run their target + examples jar, and add the following configuration in <<>> to + make the processes in YARN containers pick this jar as well. + ++---+ + + mapreduce.job.user.classpath.first + true + ++---+ From 0a6e275ee3360719290ad666629624450d4f0a6f Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Wed, 2 Oct 2013 21:31:47 +0000 Subject: [PATCH 028/133] YARN-1141. Updating resource requests should be decoupled with updating blacklist (Zhijie Shen via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528632 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/AppSchedulingInfo.java | 20 +++--- .../scheduler/capacity/CapacityScheduler.java | 5 +- .../common/fica/FiCaSchedulerApp.java | 10 ++- .../scheduler/fair/FSSchedulerApp.java | 2 +- .../scheduler/fifo/FifoScheduler.java | 7 +- .../capacity/TestApplicationLimits.java | 6 +- .../capacity/TestCapacityScheduler.java | 43 +++++++++++- .../scheduler/capacity/TestLeafQueue.java | 69 ++++++++++--------- .../scheduler/fifo/TestFifoScheduler.java | 39 ++++++++++- 10 files changed, 148 insertions(+), 56 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a934f6b93c1..28490d63d22 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -124,6 +124,9 @@ Release 2.1.2 - UNRELEASED install with https enabled doesn't have broken link on NM UI. (Omkar Vinit Joshi via vinodkv) + YARN-1141. Updating resource requests should be decoupled with updating + blacklist (Zhijie Shen via bikas) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 6f8144d4c69..b5b22b6c115 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -116,14 +116,11 @@ public class AppSchedulingInfo { * The ApplicationMaster is updating resource requirements for the * application, by asking for more resources and releasing resources acquired * by the application. - * + * * @param requests resources to be acquired - * @param blacklistAdditions resources to be added to the blacklist - * @param blacklistRemovals resources to be removed from the blacklist */ synchronized public void updateResourceRequests( - List requests, - List blacklistAdditions, List blacklistRemovals) { + List requests) { QueueMetrics metrics = queue.getMetrics(); // Update resource requests @@ -181,11 +178,16 @@ public class AppSchedulingInfo { lastRequestContainers))); } } + } - // - // Update blacklist - // - + /** + * The ApplicationMaster is updating the blacklist + * + * @param blacklistAdditions resources to be added to the blacklist + * @param blacklistRemovals resources to be removed from the blacklist + */ + synchronized public void updateBlacklist( + List blacklistAdditions, List blacklistRemovals) { // Add to blacklist if (blacklistAdditions != null) { blacklist.addAll(blacklistAdditions); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index a45b3fd79ae..a8a47c9b12c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -572,8 +572,7 @@ public class CapacityScheduler application.showRequests(); // Update application requests - application.updateResourceRequests(ask, - blacklistAdditions, blacklistRemovals); + application.updateResourceRequests(ask); LOG.debug("allocate: post-update"); application.showRequests(); @@ -585,6 +584,8 @@ public class CapacityScheduler " #ask=" + ask.size()); } + application.updateBlacklist(blacklistAdditions, blacklistRemovals); + return application.getAllocation(getResourceCalculator(), clusterResource, getMinimumResourceCapability()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index a261dbfd5a2..b93965cdc35 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -141,10 +141,16 @@ public class FiCaSchedulerApp extends SchedulerApplication { } public synchronized void updateResourceRequests( - List requests, + List requests) { + if (!isStopped) { + this.appSchedulingInfo.updateResourceRequests(requests); + } + } + + public synchronized void updateBlacklist( List blacklistAdditions, List blacklistRemovals) { if (!isStopped) { - this.appSchedulingInfo.updateResourceRequests(requests, + this.appSchedulingInfo.updateBlacklist( blacklistAdditions, blacklistRemovals); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java index 1fe400ee07d..8b5d454305d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java @@ -138,7 +138,7 @@ public class FSSchedulerApp extends SchedulerApplication { public synchronized void updateResourceRequests( List requests) { - this.appSchedulingInfo.updateResourceRequests(requests, null, null); + this.appSchedulingInfo.updateResourceRequests(requests); } public Map getResourceRequests(Priority priority) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index a6b5de72164..ac7c68a5135 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -304,7 +304,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable { application.showRequests(); // Update application requests - application.updateResourceRequests(ask, blacklistAdditions, blacklistRemovals); + application.updateResourceRequests(ask); LOG.debug("allocate: post-update" + " applicationId=" + applicationAttemptId + @@ -316,13 +316,16 @@ public class FifoScheduler implements ResourceScheduler, Configurable { " #ask=" + ask.size()); } + application.updateBlacklist(blacklistAdditions, blacklistRemovals); + return new Allocation( application.pullNewlyAllocatedContainers(), application.getHeadroom()); } } - private FiCaSchedulerApp getApplication( + @VisibleForTesting + FiCaSchedulerApp getApplication( ApplicationAttemptId applicationAttemptId) { return applications.get(applicationAttemptId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index f23168587b1..f343bd546c6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -513,7 +513,7 @@ public class TestApplicationLimits { app_0_0_requests.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, priority_1, recordFactory)); - app_0_0.updateResourceRequests(app_0_0_requests, null, null); + app_0_0.updateResourceRequests(app_0_0_requests); // Schedule to compute queue.assignContainers(clusterResource, node_0); @@ -532,7 +532,7 @@ public class TestApplicationLimits { app_0_1_requests.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, priority_1, recordFactory)); - app_0_1.updateResourceRequests(app_0_1_requests, null, null); + app_0_1.updateResourceRequests(app_0_1_requests); // Schedule to compute queue.assignContainers(clusterResource, node_0); // Schedule to compute @@ -551,7 +551,7 @@ public class TestApplicationLimits { app_1_0_requests.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, priority_1, recordFactory)); - app_1_0.updateResourceRequests(app_1_0_requests, null, null); + app_1_0.updateResourceRequests(app_1_0_requests); // Schedule to compute queue.assignContainers(clusterResource, node_0); // Schedule to compute diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 38df24fa991..c7ab72fae0a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -28,6 +28,7 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.lang.reflect.Constructor; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -41,6 +42,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; @@ -52,6 +54,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.server.resourcemanager.Application; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -62,11 +65,14 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.After; import org.junit.Before; @@ -512,8 +518,41 @@ public class TestCapacityScheduler { } return result; } - - + + @SuppressWarnings("resource") + @Test + public void testBlackListNodes() throws Exception { + Configuration conf = new Configuration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + MockRM rm = new MockRM(conf); + rm.start(); + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + + String host = "127.0.0.1"; + RMNode node = + MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, host); + cs.handle(new NodeAddedSchedulerEvent(node)); + + ApplicationId appId = BuilderUtils.newApplicationId(100, 1); + ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( + appId, 1); + SchedulerEvent event = new AppAddedSchedulerEvent(appAttemptId, "default", + "user"); + cs.handle(event); + + // Verify the blacklist can be updated independent of requesting containers + cs.allocate(appAttemptId, Collections.emptyList(), + Collections.emptyList(), + Collections.singletonList(host), null); + Assert.assertTrue(cs.getApplication(appAttemptId).isBlacklisted(host)); + cs.allocate(appAttemptId, Collections.emptyList(), + Collections.emptyList(), null, + Collections.singletonList(host)); + Assert.assertFalse(cs.getApplication(appAttemptId).isBlacklisted(host)); + rm.stop(); + } + @Test (timeout = 5000) public void testApplicationComparator() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 7d96e40cc84..f6e13a23540 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -294,7 +294,7 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 3, true, - priority, recordFactory)), null, null); + priority, recordFactory))); // Start testing... @@ -416,11 +416,11 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 3, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_1.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, - priority, recordFactory)), null, null); + priority, recordFactory))); // Start testing... @@ -549,11 +549,11 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 2*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_1.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, - priority, recordFactory)), null, null); + priority, recordFactory))); /** * Start testing... @@ -574,7 +574,7 @@ public class TestLeafQueue { // Pre MAPREDUCE-3732 this test should fail without this block too // app_2.updateResourceRequests(Collections.singletonList( // TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 1, priority, -// recordFactory))); +// recordFactory))); // 1 container to user_0 a.assignContainers(clusterResource, node_0); @@ -642,11 +642,11 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 2*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_1.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, - priority, recordFactory)), null, null); + priority, recordFactory))); /** * Start testing... @@ -681,7 +681,7 @@ public class TestLeafQueue { a.setMaxCapacity(.1f); app_2.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); assertEquals(2, a.getActiveUsersManager().getNumActiveUsers()); // No more to user_0 since he is already over user-limit @@ -698,7 +698,7 @@ public class TestLeafQueue { LOG.info("here"); app_1.updateResourceRequests(Collections.singletonList( // unset TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 0, true, - priority, recordFactory)), null, null); + priority, recordFactory))); assertEquals(1, a.getActiveUsersManager().getNumActiveUsers()); a.assignContainers(clusterResource, node_1); assertEquals(1*GB, app_2.getHeadroom().getMemory()); // hit queue max-cap @@ -759,11 +759,11 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 10, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_1.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 10, true, - priority, recordFactory)), null, null); + priority, recordFactory))); /** * Start testing... @@ -793,11 +793,11 @@ public class TestLeafQueue { app_2.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 3*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_3.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, - priority, recordFactory)), null, null); + priority, recordFactory))); // Now allocations should goto app_2 since // user_0 is at limit inspite of high user-limit-factor @@ -921,11 +921,11 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_1.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 4*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); // Start testing... @@ -1025,7 +1025,7 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 2*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); // Setup app_1 to request a 4GB container on host_0 and // another 4GB container anywhere. @@ -1037,7 +1037,7 @@ public class TestLeafQueue { true, priority, recordFactory)); appRequests_1.add(TestUtils.createResourceRequest(ResourceRequest.ANY, 4*GB, 2, true, priority, recordFactory)); - app_1.updateResourceRequests(appRequests_1, null, null); + app_1.updateResourceRequests(appRequests_1); // Start testing... @@ -1132,11 +1132,11 @@ public class TestLeafQueue { Priority priority = TestUtils.createMockPriority(1); app_0.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, - priority, recordFactory)), null, null); + priority, recordFactory))); app_1.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 4*GB, 1, true, - priority, recordFactory)), null, null); + priority, recordFactory))); // Start testing... @@ -1261,7 +1261,7 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 3, // one extra true, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); // Start testing... CSAssignment assignment = null; @@ -1326,7 +1326,7 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, // one extra true, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); assertEquals(2, app_0.getTotalRequiredResources(priority)); String host_3 = "127.0.0.4"; // on rack_1 @@ -1417,7 +1417,7 @@ public class TestLeafQueue { TestUtils.createResourceRequest(ResourceRequest.ANY, 2*GB, 1, true, priority_2, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); // Start testing... @@ -1532,7 +1532,7 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(rack_1, 1*GB, 1, true, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); // Start testing... @@ -1541,7 +1541,7 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 1, // only one true, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); // NODE_LOCAL - node_0_1 a.assignContainers(clusterResource, node_0_0); @@ -1564,7 +1564,7 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 1, // only one true, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); // No allocation on node_0_1 even though it's node/rack local since // required(rack_1) == 0 @@ -1809,8 +1809,8 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 1, // only one false, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, - Collections.singletonList(host_0_0), null); + app_0.updateResourceRequests(app_0_requests_0); + app_0.updateBlacklist(Collections.singletonList(host_0_0), null); app_0_requests_0.clear(); // @@ -1850,8 +1850,8 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(rack_1, 1*GB, 1, true, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, - Collections.singletonList(host_1_1), null); + app_0.updateResourceRequests(app_0_requests_0); + app_0.updateBlacklist(Collections.singletonList(host_1_1), null); app_0_requests_0.clear(); // resourceName: @@ -1877,7 +1877,8 @@ public class TestLeafQueue { assertEquals(0, app_0.getSchedulingOpportunities(priority)); // should be 0 // Now, remove node_1_1 from blacklist, but add rack_1 to blacklist - app_0.updateResourceRequests(app_0_requests_0, + app_0.updateResourceRequests(app_0_requests_0); + app_0.updateBlacklist( Collections.singletonList(rack_1), Collections.singletonList(host_1_1)); app_0_requests_0.clear(); @@ -1904,8 +1905,8 @@ public class TestLeafQueue { assertEquals(0, app_0.getSchedulingOpportunities(priority)); // should be 0 // Now remove rack_1 from blacklist - app_0.updateResourceRequests(app_0_requests_0, - null, Collections.singletonList(rack_1)); + app_0.updateResourceRequests(app_0_requests_0); + app_0.updateBlacklist(null, Collections.singletonList(rack_1)); app_0_requests_0.clear(); // resourceName: @@ -1937,7 +1938,7 @@ public class TestLeafQueue { app_0_requests_0.add( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 1, // only one false, priority, recordFactory)); - app_0.updateResourceRequests(app_0_requests_0, null, null); + app_0.updateResourceRequests(app_0_requests_0); app_0_requests_0.clear(); // resourceName: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 30ce68e73ee..d83c0e5cb95 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import junit.framework.Assert; @@ -43,6 +44,7 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.Application; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -67,7 +69,8 @@ import org.junit.Test; public class TestFifoScheduler { private static final Log LOG = LogFactory.getLog(TestFifoScheduler.class); - + private final int GB = 1024; + private ResourceManager resourceManager = null; private static final RecordFactory recordFactory = @@ -424,6 +427,40 @@ public class TestFifoScheduler { fs.applications, FiCaSchedulerApp.class); } + @SuppressWarnings("resource") + @Test + public void testBlackListNodes() throws Exception { + Configuration conf = new Configuration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, + ResourceScheduler.class); + MockRM rm = new MockRM(conf); + rm.start(); + FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler(); + + String host = "127.0.0.1"; + RMNode node = + MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, host); + fs.handle(new NodeAddedSchedulerEvent(node)); + + ApplicationId appId = BuilderUtils.newApplicationId(100, 1); + ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( + appId, 1); + SchedulerEvent event = new AppAddedSchedulerEvent(appAttemptId, "default", + "user"); + fs.handle(event); + + // Verify the blacklist can be updated independent of requesting containers + fs.allocate(appAttemptId, Collections.emptyList(), + Collections.emptyList(), + Collections.singletonList(host), null); + Assert.assertTrue(fs.getApplication(appAttemptId).isBlacklisted(host)); + fs.allocate(appAttemptId, Collections.emptyList(), + Collections.emptyList(), null, + Collections.singletonList(host)); + Assert.assertFalse(fs.getApplication(appAttemptId).isBlacklisted(host)); + rm.stop(); + } + private void checkApplicationResourceUsage(int expected, Application application) { Assert.assertEquals(expected, application.getUsedResources().getMemory()); From ed7278c59aa15f0397d5da5f1d6a03139d30e91e Mon Sep 17 00:00:00 2001 From: Jonathan Turner Eagles Date: Wed, 2 Oct 2013 21:51:05 +0000 Subject: [PATCH 029/133] YARN-425. coverage fix for yarn api (Aleksey Gorshkov via jeagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528641 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 + ...gerAdministrationProtocolPBClientImpl.java | 191 ++++++++++++++++++ .../yarn/client/TestYarnApiClasses.java | 113 +++++++++++ .../test/resources/config-with-security.xml | 36 ++++ 4 files changed, 342 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnApiClasses.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/config-with-security.xml diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 28490d63d22..58ca8c1c04b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -47,6 +47,8 @@ Release 2.3.0 - UNRELEASED YARN-677. Increase coverage to FairScheduler (Vadim Bondarev and Dennis Y via jeagles) + YARN-425. coverage fix for yarn api (Aleksey Gorshkov via jeagles) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java new file mode 100644 index 00000000000..47306693952 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.client; + +import java.io.IOException; +import java.net.InetSocketAddress; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.Service.STATE; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; +import org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceManagerAdministrationProtocolPBClientImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Test ResourceManagerAdministrationProtocolPBClientImpl. Test a methods and the proxy without logic. + */ +public class TestResourceManagerAdministrationProtocolPBClientImpl { + private static ResourceManager resourceManager; + private static final Log LOG = LogFactory + .getLog(TestResourceManagerAdministrationProtocolPBClientImpl.class); + private final RecordFactory recordFactory = RecordFactoryProvider + .getRecordFactory(null); + + private static ResourceManagerAdministrationProtocol client; + + /** + * Start resource manager server + */ + + @BeforeClass + public static void setUpResourceManager() throws IOException, + InterruptedException { + Configuration.addDefaultResource("config-with-security.xml"); + Configuration configuration = new YarnConfiguration(); + resourceManager = new ResourceManager() { + @Override + protected void doSecureLogin() throws IOException { + } + }; + resourceManager.init(configuration); + new Thread() { + public void run() { + resourceManager.start(); + } + }.start(); + int waitCount = 0; + while (resourceManager.getServiceState() == STATE.INITED + && waitCount++ < 10) { + LOG.info("Waiting for RM to start..."); + Thread.sleep(1000); + } + if (resourceManager.getServiceState() != STATE.STARTED) { + throw new IOException("ResourceManager failed to start. Final state is " + + resourceManager.getServiceState()); + } + LOG.info("ResourceManager RMAdmin address: " + + configuration.get(YarnConfiguration.RM_ADMIN_ADDRESS)); + + client = new ResourceManagerAdministrationProtocolPBClientImpl(1L, + getProtocolAddress(configuration), configuration); + + } + + /** + * Test method refreshQueues. This method is present and it works. + */ + @Test + public void testRefreshQueues() throws Exception { + + RefreshQueuesRequest request = recordFactory + .newRecordInstance(RefreshQueuesRequest.class); + RefreshQueuesResponse response = client.refreshQueues(request); + assertNotNull(response); + } + + /** + * Test method refreshNodes. This method is present and it works. + */ + + @Test + public void testRefreshNodes() throws Exception { + resourceManager.getClientRMService(); + RefreshNodesRequest request = recordFactory + .newRecordInstance(RefreshNodesRequest.class); + RefreshNodesResponse response = client.refreshNodes(request); + assertNotNull(response); + } + + /** + * Test method refreshSuperUserGroupsConfiguration. This method present and it works. + */ + @Test + public void testRefreshSuperUserGroupsConfiguration() throws Exception { + + RefreshSuperUserGroupsConfigurationRequest request = recordFactory + .newRecordInstance(RefreshSuperUserGroupsConfigurationRequest.class); + RefreshSuperUserGroupsConfigurationResponse response = client + .refreshSuperUserGroupsConfiguration(request); + assertNotNull(response); + } + + /** + * Test method refreshUserToGroupsMappings. This method is present and it works. + */ + @Test + public void testRefreshUserToGroupsMappings() throws Exception { + RefreshUserToGroupsMappingsRequest request = recordFactory + .newRecordInstance(RefreshUserToGroupsMappingsRequest.class); + RefreshUserToGroupsMappingsResponse response = client + .refreshUserToGroupsMappings(request); + assertNotNull(response); + } + + /** + * Test method refreshAdminAcls. This method is present and it works. + */ + + @Test + public void testRefreshAdminAcls() throws Exception { + RefreshAdminAclsRequest request = recordFactory + .newRecordInstance(RefreshAdminAclsRequest.class); + RefreshAdminAclsResponse response = client.refreshAdminAcls(request); + assertNotNull(response); + } + + @Test + public void testRefreshServiceAcls() throws Exception { + RefreshServiceAclsRequest request = recordFactory + .newRecordInstance(RefreshServiceAclsRequest.class); + RefreshServiceAclsResponse response = client.refreshServiceAcls(request); + assertNotNull(response); + + } + + /** + * Stop server + */ + + @AfterClass + public static void tearDownResourceManager() throws InterruptedException { + if (resourceManager != null) { + LOG.info("Stopping ResourceManager..."); + resourceManager.stop(); + } + } + + private static InetSocketAddress getProtocolAddress(Configuration conf) + throws IOException { + return conf.getSocketAddr(YarnConfiguration.RM_ADMIN_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADMIN_PORT); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnApiClasses.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnApiClasses.java new file mode 100644 index 00000000000..0e3bc877ab5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestYarnApiClasses.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.client; + + + +import org.apache.hadoop.security.proto.SecurityProtos.CancelDelegationTokenRequestProto; +import org.apache.hadoop.security.proto.SecurityProtos.RenewDelegationTokenRequestProto; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RenewDelegationTokenRequestPBImpl; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.junit.Test; + +import static org.junit.Assert.*; + + +public class TestYarnApiClasses { + private final org.apache.hadoop.yarn.factories.RecordFactory recordFactory = RecordFactoryProvider + .getRecordFactory(null); + + /** + * Simple test Resource request. + * Test hashCode, equals and compare. + */ + @Test + public void testResourceRequest() { + + Resource resource = recordFactory.newRecordInstance(Resource.class); + Priority priority = recordFactory.newRecordInstance(Priority.class); + + ResourceRequest original = ResourceRequest.newInstance(priority, "localhost", resource, 2) ; + + ResourceRequest copy = ResourceRequest.newInstance(priority, "localhost", resource, 2); + + assertTrue(original.equals(copy)); + assertEquals(0, original.compareTo(copy)); + assertTrue(original.hashCode() == copy.hashCode()); + + copy.setNumContainers(1); + + assertFalse(original.equals(copy)); + assertNotSame(0, original.compareTo(copy)); + assertFalse(original.hashCode() == copy.hashCode()); + + } + + /** + * Test CancelDelegationTokenRequestPBImpl. + * Test a transformation to prototype and back + */ + @Test + public void testCancelDelegationTokenRequestPBImpl() { + + Token token = getDelegationToken(); + + CancelDelegationTokenRequestPBImpl original = new CancelDelegationTokenRequestPBImpl(); + original.setDelegationToken(token); + CancelDelegationTokenRequestProto protoType = original.getProto(); + + CancelDelegationTokenRequestPBImpl copy = new CancelDelegationTokenRequestPBImpl(protoType); + assertNotNull(copy.getDelegationToken()); + //compare source and converted + assertEquals(token, copy.getDelegationToken()); + + } + + /** + * Test RenewDelegationTokenRequestPBImpl. + * Test a transformation to prototype and back + */ + + @Test + public void testRenewDelegationTokenRequestPBImpl() { + + Token token = getDelegationToken(); + + RenewDelegationTokenRequestPBImpl original = new RenewDelegationTokenRequestPBImpl(); + original.setDelegationToken(token); + RenewDelegationTokenRequestProto protoType = original.getProto(); + + RenewDelegationTokenRequestPBImpl copy = new RenewDelegationTokenRequestPBImpl(protoType); + assertNotNull(copy.getDelegationToken()); + //compare source and converted + assertEquals(token, copy.getDelegationToken()); + + } + + + private Token getDelegationToken() { + return Token.newInstance(new byte[0], "", new byte[0], ""); + } + + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/config-with-security.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/config-with-security.xml new file mode 100644 index 00000000000..22eaf492247 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/config-with-security.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + hadoop.security.authorization + true + Is service-level authorization enabled? + + + + From 90a5dc34f7be0a92d2b644b80cf42e9307deff68 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Wed, 2 Oct 2013 21:53:57 +0000 Subject: [PATCH 030/133] MAPREDUCE-5554. hdfs-site.xml included in hadoop-mapreduce-client-jobclient tests jar is breaking tests for downstream components (Robert Kanter via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528643 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 ++++ .../hadoop-mapreduce-client-jobclient/pom.xml | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index a5590213ca2..af76c67ea89 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -268,6 +268,10 @@ Release 2.1.2 - UNRELEASED MAPREDUCE-5459. Update documentation on how to run MRv1 examples on YARN. (Zhijie Shen via acmurthy) + MAPREDUCE-5554. hdfs-site.xml included in hadoop-mapreduce-client-jobclient + tests jar is breaking tests for downstream components (Robert Kanter via + Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml index edb0fc6b53f..530822f119e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml @@ -124,6 +124,11 @@ test-jar + + + **/hdfs-site.xml + + test-compile From 86cc60c43b65a09f317cf75ab64e307745c213e0 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Wed, 2 Oct 2013 22:33:47 +0000 Subject: [PATCH 031/133] YARN-876. Node resource is added twice when node comes back from unhealthy. (Peng Zhang via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528660 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/resourcemanager/rmnode/RMNodeImpl.java | 7 +++++-- .../resourcemanager/TestResourceTrackerService.java | 8 ++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 58ca8c1c04b..2de54a17812 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -129,6 +129,9 @@ Release 2.1.2 - UNRELEASED YARN-1141. Updating resource requests should be decoupled with updating blacklist (Zhijie Shen via bikas) + YARN-876. Node resource is added twice when node comes back from unhealthy + to healthy. (Peng Zhang via Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index fa6ae4a20a2..7964e75b6d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -460,8 +460,11 @@ public class RMNodeImpl implements RMNode, EventHandler { && rmNode.getHttpPort() == newNode.getHttpPort()) { // Reset heartbeat ID since node just restarted. rmNode.getLastNodeHeartBeatResponse().setResponseId(0); - rmNode.context.getDispatcher().getEventHandler().handle( - new NodeAddedSchedulerEvent(rmNode)); + if (rmNode.getState() != NodeState.UNHEALTHY) { + // Only add new node if old state is not UNHEALTHY + rmNode.context.getDispatcher().getEventHandler().handle( + new NodeAddedSchedulerEvent(rmNode)); + } } else { // Reconnected node differs, so replace old node and start new node switch (rmNode.getState()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 81e2a81fe8a..abb21edfc5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -506,6 +506,14 @@ public class TestResourceTrackerService { dispatcher.await(); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); checkUnealthyNMCount(rm, nm2, true, 1); + + // unhealthy node changed back to healthy + nm2 = rm.registerNode("host2:5678", 5120); + dispatcher.await(); + response = nm2.nodeHeartbeat(true); + response = nm2.nodeHeartbeat(true); + dispatcher.await(); + Assert.assertEquals(5120 + 5120, metrics.getAvailableMB()); // reconnect of node with changed capability nm1 = rm.registerNode("host2:5678", 10240); From 65a55f2294cc94300b27bdcbbe7f828cd233d018 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Thu, 3 Oct 2013 00:38:00 +0000 Subject: [PATCH 032/133] HDFS-5289. Race condition in TestRetryCacheWithHA#testCreateSymlink causes spurious test failure. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528693 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../server/namenode/ha/TestRetryCacheWithHA.java | 13 +++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 29b9be665d6..f38e426011c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -388,6 +388,9 @@ Release 2.1.2 - UNRELEASED HDFS-5279. Guard against NullPointerException in NameNode JSP pages before initialization of FSNamesystem. (cnauroth) + HDFS-5289. Race condition in TestRetryCacheWithHA#testCreateSymlink causes + spurious test failure. (atm) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java index dff44a0c89c..82deab59386 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.io.FileNotFoundException; import java.io.IOException; import java.lang.reflect.InvocationHandler; import java.lang.reflect.Method; @@ -646,10 +647,14 @@ public class TestRetryCacheWithHA { @Override boolean checkNamenodeBeforeReturn() throws Exception { Path linkPath = new Path(link); - FileStatus linkStatus = dfs.getFileLinkStatus(linkPath); + FileStatus linkStatus = null; for (int i = 0; i < CHECKTIMES && linkStatus == null; i++) { - Thread.sleep(1000); - linkStatus = dfs.getFileLinkStatus(linkPath); + try { + linkStatus = dfs.getFileLinkStatus(linkPath); + } catch (FileNotFoundException fnf) { + // Ignoring, this can be legitimate. + Thread.sleep(1000); + } } return linkStatus != null; } @@ -857,4 +862,4 @@ public class TestRetryCacheWithHA { + results.get(op.name)); } } -} \ No newline at end of file +} From 2db1d4d4ee7838f2a8c0b3999b6055456798321d Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Thu, 3 Oct 2013 01:17:48 +0000 Subject: [PATCH 033/133] YARN-1213. Restore config to ban submitting to undeclared pools in the Fair Scheduler. (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528696 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/fair/FairScheduler.java | 7 +- .../fair/FairSchedulerConfiguration.java | 9 ++ .../scheduler/fair/QueueManager.java | 22 ++--- .../scheduler/fair/TestFairScheduler.java | 88 ++++++++++++++----- 5 files changed, 89 insertions(+), 40 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 2de54a17812..5efa8d158d9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -92,6 +92,9 @@ Release 2.1.2 - UNRELEASED YARN-1228. Clean up Fair Scheduler configuration loading. (Sandy Ryza) + YARN-1213. Restore config to ban submitting to undeclared pools in the + Fair Scheduler. (Sandy Ryza) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index fa4c21805be..f7897e0f2f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -677,10 +677,11 @@ public class FairScheduler implements ResourceScheduler { queueName = user; } - FSLeafQueue queue = queueMgr.getLeafQueue(queueName); + FSLeafQueue queue = queueMgr.getLeafQueue(queueName, + conf.getAllowUndeclaredPools()); if (queue == null) { // queue is not an existing or createable leaf queue - queue = queueMgr.getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME); + queue = queueMgr.getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME, false); } if (rmApp != null) { @@ -726,7 +727,7 @@ public class FairScheduler implements ResourceScheduler { // Inform the queue FSLeafQueue queue = queueMgr.getLeafQueue(application.getQueue() - .getQueueName()); + .getQueueName(), false); queue.removeApp(application); // Remove from our data-structure diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java index edfc8fa83e0..b76d8eb8d75 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java @@ -54,6 +54,11 @@ public class FairSchedulerConfiguration extends Configuration { protected static final String DEFAULT_ALLOCATION_FILE = "fair-scheduler.xml"; protected static final String EVENT_LOG_DIR = "eventlog.dir"; + /** Whether pools can be created that were not specified in the FS configuration file + */ + protected static final String ALLOW_UNDECLARED_POOLS = CONF_PREFIX + "allow-undeclared-pools"; + protected static final boolean DEFAULT_ALLOW_UNDECLARED_POOLS = true; + /** Whether to use the user name as the queue name (instead of "default") if * the request does not specify a queue. */ protected static final String USER_AS_DEFAULT_QUEUE = CONF_PREFIX + "user-as-default-queue"; @@ -141,6 +146,10 @@ public class FairSchedulerConfiguration extends Configuration { DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES); return Resources.createResource(incrementMemory, incrementCores); } + + public boolean getAllowUndeclaredPools() { + return getBoolean(ALLOW_UNDECLARED_POOLS, DEFAULT_ALLOW_UNDECLARED_POOLS); + } public boolean getUserAsDefaultQueue() { return getBoolean(USER_AS_DEFAULT_QUEUE, DEFAULT_USER_AS_DEFAULT_QUEUE); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java index 7560309f5e7..b891381cd81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java @@ -109,26 +109,27 @@ public class QueueManager { lastSuccessfulReload = scheduler.getClock().getTime(); lastReloadAttempt = scheduler.getClock().getTime(); // Create the default queue - getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME); + getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME, true); } /** - * Get a queue by name, creating it if necessary. If the queue - * is not or can not be a leaf queue, i.e. it already exists as a parent queue, - * or one of the parents in its name is already a leaf queue, null is returned. + * Get a queue by name, creating it if the create param is true and is necessary. + * If the queue is not or can not be a leaf queue, i.e. it already exists as a + * parent queue, or one of the parents in its name is already a leaf queue, + * null is returned. * * The root part of the name is optional, so a queue underneath the root * named "queue1" could be referred to as just "queue1", and a queue named * "queue2" underneath a parent named "parent1" that is underneath the root * could be referred to as just "parent1.queue2". */ - public FSLeafQueue getLeafQueue(String name) { + public FSLeafQueue getLeafQueue(String name, boolean create) { if (!name.startsWith(ROOT_QUEUE + ".")) { name = ROOT_QUEUE + "." + name; } synchronized (queues) { FSQueue queue = queues.get(name); - if (queue == null) { + if (queue == null && create) { FSLeafQueue leafQueue = createLeafQueue(name); if (leafQueue == null) { return null; @@ -223,13 +224,6 @@ public class QueueManager { } } - /** - * Get the queue for a given AppSchedulable. - */ - public FSLeafQueue getQueueForApp(AppSchedulable app) { - return getLeafQueue(app.getApp().getQueueName()); - } - /** * Reload allocations file if it hasn't been loaded in a while */ @@ -384,7 +378,7 @@ public class QueueManager { // Create all queus for (String name: queueNamesInAllocFile) { - getLeafQueue(name); + getLeafQueue(name, true); } // Set custom policies as specified diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 3086afcb5a9..e697560b223 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -436,9 +436,9 @@ public class TestFairScheduler { Collection queues = queueManager.getLeafQueues(); assertEquals(3, queues.size()); - FSLeafQueue queue1 = queueManager.getLeafQueue("default"); - FSLeafQueue queue2 = queueManager.getLeafQueue("parent.queue2"); - FSLeafQueue queue3 = queueManager.getLeafQueue("parent.queue3"); + FSLeafQueue queue1 = queueManager.getLeafQueue("default", true); + FSLeafQueue queue2 = queueManager.getLeafQueue("parent.queue2", true); + FSLeafQueue queue3 = queueManager.getLeafQueue("parent.queue3", true); assertEquals(capacity / 2, queue1.getFairShare().getMemory()); assertEquals(capacity / 2, queue1.getMetrics().getFairShareMB()); assertEquals(capacity / 4, queue2.getFairShare().getMemory()); @@ -450,20 +450,20 @@ public class TestFairScheduler { @Test public void testHierarchicalQueuesSimilarParents() { QueueManager queueManager = scheduler.getQueueManager(); - FSLeafQueue leafQueue = queueManager.getLeafQueue("parent.child"); + FSLeafQueue leafQueue = queueManager.getLeafQueue("parent.child", true); Assert.assertEquals(2, queueManager.getLeafQueues().size()); Assert.assertNotNull(leafQueue); Assert.assertEquals("root.parent.child", leafQueue.getName()); - FSLeafQueue leafQueue2 = queueManager.getLeafQueue("parent"); + FSLeafQueue leafQueue2 = queueManager.getLeafQueue("parent", true); Assert.assertNull(leafQueue2); Assert.assertEquals(2, queueManager.getLeafQueues().size()); - FSLeafQueue leafQueue3 = queueManager.getLeafQueue("parent.child.grandchild"); + FSLeafQueue leafQueue3 = queueManager.getLeafQueue("parent.child.grandchild", true); Assert.assertNull(leafQueue3); Assert.assertEquals(2, queueManager.getLeafQueues().size()); - FSLeafQueue leafQueue4 = queueManager.getLeafQueue("parent.sister"); + FSLeafQueue leafQueue4 = queueManager.getLeafQueue("parent.sister", true); Assert.assertNotNull(leafQueue4); Assert.assertEquals("root.parent.sister", leafQueue4.getName()); Assert.assertEquals(3, queueManager.getLeafQueues().size()); @@ -612,9 +612,9 @@ public class TestFairScheduler { AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent( createAppAttemptId(1, 1), "default", "user1"); scheduler.handle(appAddedEvent); - assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1") + assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getAppSchedulables().size()); - assertEquals(0, scheduler.getQueueManager().getLeafQueue("default") + assertEquals(0, scheduler.getQueueManager().getLeafQueue("default", true) .getAppSchedulables().size()); conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "false"); @@ -622,11 +622,11 @@ public class TestFairScheduler { AppAddedSchedulerEvent appAddedEvent2 = new AppAddedSchedulerEvent( createAppAttemptId(2, 1), "default", "user2"); scheduler.handle(appAddedEvent2); - assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1") + assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getAppSchedulables().size()); - assertEquals(1, scheduler.getQueueManager().getLeafQueue("default") + assertEquals(1, scheduler.getQueueManager().getLeafQueue("default", true) .getAppSchedulables().size()); - assertEquals(0, scheduler.getQueueManager().getLeafQueue("user2") + assertEquals(0, scheduler.getQueueManager().getLeafQueue("user2", true) .getAppSchedulables().size()); } @@ -772,7 +772,7 @@ public class TestFairScheduler { assertEquals(2, scheduler.getQueueManager().getLeafQueues().size()); // That queue should have one app - assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1") + assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getAppSchedulables().size()); AppRemovedSchedulerEvent appRemovedEvent1 = new AppRemovedSchedulerEvent( @@ -782,7 +782,7 @@ public class TestFairScheduler { scheduler.handle(appRemovedEvent1); // Queue should have no apps - assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1") + assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1", true) .getAppSchedulables().size()); } @@ -919,10 +919,10 @@ public class TestFairScheduler { Collection leafQueues = queueManager.getLeafQueues(); Assert.assertEquals(4, leafQueues.size()); - Assert.assertNotNull(queueManager.getLeafQueue("queueA")); - Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueC")); - Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueD")); - Assert.assertNotNull(queueManager.getLeafQueue("default")); + Assert.assertNotNull(queueManager.getLeafQueue("queueA", true)); + Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueC", true)); + Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueD", true)); + Assert.assertNotNull(queueManager.getLeafQueue("default", true)); // Make sure querying for queues didn't create any new ones: Assert.assertEquals(4, leafQueues.size()); } @@ -1423,9 +1423,9 @@ public class TestFairScheduler { scheduler.update(); FSLeafQueue schedC = - scheduler.getQueueManager().getLeafQueue("queueC"); + scheduler.getQueueManager().getLeafQueue("queueC", true); FSLeafQueue schedD = - scheduler.getQueueManager().getLeafQueue("queueD"); + scheduler.getQueueManager().getLeafQueue("queueD", true); assertTrue(Resources.equals( Resources.none(), scheduler.resToPreempt(schedC, clock.getTime()))); @@ -1688,7 +1688,7 @@ public class TestFairScheduler { FSSchedulerApp app1 = scheduler.applications.get(attId1); FSSchedulerApp app2 = scheduler.applications.get(attId2); - FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1"); + FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true); queue1.setPolicy(new FifoPolicy()); scheduler.update(); @@ -1716,7 +1716,7 @@ public class TestFairScheduler { public void testMaxAssign() throws AllocationConfigurationException { // set required scheduler configs scheduler.assignMultiple = true; - scheduler.getQueueManager().getLeafQueue("root.default") + scheduler.getQueueManager().getLeafQueue("root.default", true) .setPolicy(SchedulingPolicy.getDefault()); RMNode node = @@ -1793,7 +1793,7 @@ public class TestFairScheduler { FSSchedulerApp app3 = scheduler.applications.get(attId3); FSSchedulerApp app4 = scheduler.applications.get(attId4); - scheduler.getQueueManager().getLeafQueue(fifoQueue) + scheduler.getQueueManager().getLeafQueue(fifoQueue, true) .setPolicy(SchedulingPolicy.parse("fifo")); scheduler.update(); @@ -2405,4 +2405,46 @@ public class TestFairScheduler { //expected } } + + @Test + public void testDontAllowUndeclaredPools() throws Exception{ + Configuration conf = createConfiguration(); + conf.setBoolean(FairSchedulerConfiguration.ALLOW_UNDECLARED_POOLS, false); + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.close(); + + QueueManager queueManager = scheduler.getQueueManager(); + queueManager.initialize(); + + FSLeafQueue jerryQueue = queueManager.getLeafQueue("jerry", false); + FSLeafQueue defaultQueue = queueManager.getLeafQueue("default", false); + + // Should get put into jerry + createSchedulingRequest(1024, "jerry", "someuser"); + assertEquals(1, jerryQueue.getAppSchedulables().size()); + + // Should get forced into default + createSchedulingRequest(1024, "newqueue", "someuser"); + assertEquals(1, jerryQueue.getAppSchedulables().size()); + assertEquals(1, defaultQueue.getAppSchedulables().size()); + + // Would get put into someuser because of user-as-default-queue, but should + // be forced into default + createSchedulingRequest(1024, "default", "someuser"); + assertEquals(1, jerryQueue.getAppSchedulables().size()); + assertEquals(2, defaultQueue.getAppSchedulables().size()); + + // Should get put into jerry because of user-as-default-queue + createSchedulingRequest(1024, "default", "jerry"); + assertEquals(2, jerryQueue.getAppSchedulables().size()); + assertEquals(2, defaultQueue.getAppSchedulables().size()); + } } From dbdb8c6f1f6bfe2663f81d9950b21a7b99a0d6ec Mon Sep 17 00:00:00 2001 From: Jonathan Turner Eagles Date: Thu, 3 Oct 2013 15:52:30 +0000 Subject: [PATCH 034/133] Revert YARN-677. Increase coverage to FairScheduler (Vadim Bondarev and Dennis Y via jeagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528914 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 - .../scheduler/fair/TestFairScheduler.java | 93 ------------------- 2 files changed, 96 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5efa8d158d9..788a4cc6c96 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -44,9 +44,6 @@ Release 2.3.0 - UNRELEASED YARN-819. ResourceManager and NodeManager should check for a minimum allowed version (Robert Parker via jeagles) - YARN-677. Increase coverage to FairScheduler (Vadim Bondarev and Dennis Y - via jeagles) - YARN-425. coverage fix for yarn api (Aleksey Gorshkov via jeagles) OPTIMIZATIONS diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index e697560b223..7cc800260e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -39,7 +39,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.TimeUnit; import javax.xml.parsers.ParserConfigurationException; @@ -87,8 +86,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedS import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; @@ -103,9 +100,6 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import org.xml.sax.SAXException; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.UnmodifiableIterator; - public class TestFairScheduler { private class MockClock implements Clock { @@ -2318,93 +2312,6 @@ public class TestFairScheduler { Assert.assertEquals(1, consumption.getVirtualCores()); } - - @Test - public void testAggregateCapacityTrackingWithPreemptionEnabled() throws Exception { - int KB = 1024; - int iterationNumber = 10; - Configuration conf = createConfiguration(); - conf.setBoolean("yarn.scheduler.fair.preemption", true); - scheduler.reinitialize(conf, resourceManager.getRMContext()); - RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(KB * iterationNumber)); - NodeAddedSchedulerEvent nodeAddEvent = new NodeAddedSchedulerEvent(node); - scheduler.handle(nodeAddEvent); - - for (int i = 0; i < iterationNumber; i++) { - createSchedulingRequest(KB, "queue1", "user1", 1); - scheduler.update(); - NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); - scheduler.handle(updateEvent); - - assertEquals(KB, - scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemory()); - TimeUnit.SECONDS.sleep(1); - } - } - - private static final class ExternalAppAddedSchedulerEvent extends SchedulerEvent { - public ExternalAppAddedSchedulerEvent() { - super(SchedulerEventType.APP_ADDED); - } - } - - private static final class ExternalNodeRemovedSchedulerEvent extends SchedulerEvent { - public ExternalNodeRemovedSchedulerEvent() { - super(SchedulerEventType.NODE_REMOVED); - } - } - - private static final class ExternalNodeUpdateSchedulerEvent extends SchedulerEvent { - public ExternalNodeUpdateSchedulerEvent() { - super(SchedulerEventType.NODE_UPDATE); - } - } - - private static final class ExternalNodeAddedSchedulerEvent extends SchedulerEvent { - public ExternalNodeAddedSchedulerEvent() { - super(SchedulerEventType.NODE_ADDED); - } - } - - private static final class ExternalAppRemovedSchedulerEvent extends SchedulerEvent { - public ExternalAppRemovedSchedulerEvent() { - super(SchedulerEventType.APP_REMOVED); - } - } - - private static final class ExternalContainerExpiredSchedulerEvent extends SchedulerEvent { - public ExternalContainerExpiredSchedulerEvent() { - super(SchedulerEventType.CONTAINER_EXPIRED); - } - } - - /** - * try to handle external events type - * and get {@code RuntimeException} - * - * @throws Exception - */ - @Test - public void testSchedulerHandleFailWithExternalEvents() throws Exception { - Configuration conf = createConfiguration(); - scheduler.reinitialize(conf, resourceManager.getRMContext()); - ImmutableSet externalEvents = ImmutableSet.of(new ExternalAppAddedSchedulerEvent(), - new ExternalNodeRemovedSchedulerEvent(), new ExternalNodeUpdateSchedulerEvent(), - new ExternalNodeAddedSchedulerEvent(), new ExternalAppRemovedSchedulerEvent(), - new ExternalContainerExpiredSchedulerEvent()); - - UnmodifiableIterator iter = externalEvents.iterator(); - while(iter.hasNext()) - handleExternalEvent(iter.next()); - } - - private void handleExternalEvent(SchedulerEvent event) throws Exception { - try { - scheduler.handle(event); - } catch(RuntimeException ex) { - //expected - } - } @Test public void testDontAllowUndeclaredPools() throws Exception{ From 7b687dda098c94b55c9848827cfc2d6326b6f9b6 Mon Sep 17 00:00:00 2001 From: Jonathan Turner Eagles Date: Thu, 3 Oct 2013 21:04:12 +0000 Subject: [PATCH 035/133] YARN-1199. Make NM/RM Versions Available (Mit Desai via jeagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529003 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/mapreduce/v2/TestRMNMInfo.java | 2 ++ .../apache/hadoop/yarn/sls/nodemanager/NodeInfo.java | 6 ++++++ .../hadoop/yarn/sls/scheduler/RMNodeWrapper.java | 6 ++++++ hadoop-yarn-project/CHANGES.txt | 2 ++ .../hadoop/yarn/server/resourcemanager/RMNMInfo.java | 2 ++ .../resourcemanager/ResourceTrackerService.java | 2 +- .../yarn/server/resourcemanager/rmnode/RMNode.java | 9 +++++++-- .../server/resourcemanager/rmnode/RMNodeImpl.java | 9 ++++++++- .../server/resourcemanager/webapp/NodesPage.java | 2 ++ .../server/resourcemanager/webapp/dao/NodeInfo.java | 6 ++++++ .../hadoop/yarn/server/resourcemanager/MockNM.java | 11 ++++++++++- .../yarn/server/resourcemanager/MockNodes.java | 5 +++++ .../resourcemanager/TestRMNodeTransitions.java | 6 +++--- .../server/resourcemanager/webapp/TestNodesPage.java | 2 +- .../webapp/TestRMWebServicesNodes.java | 12 ++++++++---- 15 files changed, 69 insertions(+), 13 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestRMNMInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestRMNMInfo.java index 4d16a6b326c..4ac1f86b57d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestRMNMInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestRMNMInfo.java @@ -120,6 +120,7 @@ public class TestRMNMInfo { Assert.assertNotNull(n.get("NodeHTTPAddress")); Assert.assertNotNull(n.get("LastHealthUpdate")); Assert.assertNotNull(n.get("HealthReport")); + Assert.assertNotNull(n.get("NodeManagerVersion")); Assert.assertNotNull(n.get("NumContainers")); Assert.assertEquals( n.get("NodeId") + ": Unexpected number of used containers", @@ -156,6 +157,7 @@ public class TestRMNMInfo { Assert.assertNotNull(n.get("NodeHTTPAddress")); Assert.assertNotNull(n.get("LastHealthUpdate")); Assert.assertNotNull(n.get("HealthReport")); + Assert.assertNotNull(n.get("NodeManagerVersion")); Assert.assertNull(n.get("NumContainers")); Assert.assertNull(n.get("UsedMemoryMB")); Assert.assertNull(n.get("AvailableMemoryMB")); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 03ae6b05e7f..b1c76089ed7 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -147,6 +147,12 @@ public class NodeInfo { list2)); return list; } + + @Override + public String getNodeManagerVersion() { + // TODO Auto-generated method stub + return null; + } } public static RMNode newNodeInfo(String rackName, String hostName, diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index ba228b3277d..c6fa2f01358 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -138,4 +138,10 @@ public class RMNodeWrapper implements RMNode { return updates; } +@Override +public String getNodeManagerVersion() { + // TODO Auto-generated method stub + return null; +} + } diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 788a4cc6c96..1db9894223e 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -46,6 +46,8 @@ Release 2.3.0 - UNRELEASED YARN-425. coverage fix for yarn api (Aleksey Gorshkov via jeagles) + YARN-1199. Make NM/RM Versions Available (Mit Desai via jeagles) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java index 5f7002ce4b1..ef4a0d47700 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMNMInfo.java @@ -90,6 +90,8 @@ public class RMNMInfo implements RMNMInfoBeans { ni.getLastHealthReportTime()); info.put("HealthReport", ni.getHealthReport()); + info.put("NodeManagerVersion", + ni.getNodeManagerVersion()); if(report != null) { info.put("NumContainers", report.getNumContainers()); info.put("UsedMemoryMB", report.getUsedResource().getMemory()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index d29115797e0..7995fb37d60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -234,7 +234,7 @@ public class ResourceTrackerService extends AbstractService implements .getCurrentKey()); RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, - resolve(host), capability); + resolve(host), capability, nodeManagerVersion); RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode); if (oldNode == null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java index 0021e25b180..b3609747d1a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java @@ -27,7 +27,6 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; /** * Node managers information on available resources @@ -84,7 +83,13 @@ public interface RMNode { * @return the time of the latest health report received from this node. */ public long getLastHealthReportTime(); - + + /** + * the node manager version of the node received as part of the + * registration with the resource manager + */ + public String getNodeManagerVersion(); + /** * the total available resource. * @return the total available resource. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 7964e75b6d5..00a5cb4f209 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -97,6 +97,7 @@ public class RMNodeImpl implements RMNode, EventHandler { private String healthReport; private long lastHealthReportTime; + private String nodeManagerVersion; /* set of containers that have just launched */ private final Map justLaunchedContainers = @@ -172,7 +173,7 @@ public class RMNodeImpl implements RMNode, EventHandler { RMNodeEvent> stateMachine; public RMNodeImpl(NodeId nodeId, RMContext context, String hostName, - int cmPort, int httpPort, Node node, Resource capability) { + int cmPort, int httpPort, Node node, Resource capability, String nodeManagerVersion) { this.nodeId = nodeId; this.context = context; this.hostName = hostName; @@ -184,6 +185,7 @@ public class RMNodeImpl implements RMNode, EventHandler { this.node = node; this.healthReport = "Healthy"; this.lastHealthReportTime = System.currentTimeMillis(); + this.nodeManagerVersion = nodeManagerVersion; this.latestNodeHeartBeatResponse.setResponseId(0); @@ -288,6 +290,11 @@ public class RMNodeImpl implements RMNode, EventHandler { } } + @Override + public String getNodeManagerVersion() { + return nodeManagerVersion; + } + @Override public NodeState getState() { this.readLock.lock(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index 87720e05960..d79e7759da6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -76,6 +76,7 @@ class NodesPage extends RmView { th(".containers", "Containers"). th(".mem", "Mem Used"). th(".mem", "Mem Avail"). + th(".nodeManagerVersion", "Version"). _()._(). tbody(); NodeState stateFilter = null; @@ -129,6 +130,7 @@ class NodesPage extends RmView { _(StringUtils.byteDesc(usedMemory * BYTES_IN_MB))._(). td().br().$title(String.valueOf(usedMemory))._(). _(StringUtils.byteDesc(availableMemory * BYTES_IN_MB))._(). + td(ni.getNodeManagerVersion()). _(); } tbody._()._(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java index 99506f33a0d..3354ff20308 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java @@ -38,6 +38,7 @@ public class NodeInfo { protected String nodeHostName; protected String nodeHTTPAddress; protected long lastHealthUpdate; + protected String version; protected String healthReport; protected int numContainers; protected long usedMemoryMB; @@ -64,6 +65,7 @@ public class NodeInfo { this.nodeHTTPAddress = ni.getHttpAddress(); this.lastHealthUpdate = ni.getLastHealthReportTime(); this.healthReport = String.valueOf(ni.getHealthReport()); + this.version = ni.getNodeManagerVersion(); } public String getRack() { @@ -90,6 +92,10 @@ public class NodeInfo { return this.lastHealthUpdate; } + public String getVersion() { + return this.version; + } + public String getHealthReport() { return this.healthReport; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java index 1258bb86707..c2cf147d292 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.Records; +import org.apache.hadoop.yarn.util.YarnVersionInfo; public class MockNM { @@ -51,6 +52,7 @@ public class MockNM { private final int httpPort = 2; private MasterKey currentContainerTokenMasterKey; private MasterKey currentNMTokenMasterKey; + private String version; public MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTracker) { // scale vcores based on the requested memory @@ -61,10 +63,16 @@ public class MockNM { } public MockNM(String nodeIdStr, int memory, int vcores, - ResourceTrackerService resourceTracker) { + ResourceTrackerService resourceTracker) { + this(nodeIdStr, memory, vcores, resourceTracker, YarnVersionInfo.getVersion()); + } + + public MockNM(String nodeIdStr, int memory, int vcores, + ResourceTrackerService resourceTracker, String version) { this.memory = memory; this.vCores = vcores; this.resourceTracker = resourceTracker; + this.version = version; String[] splits = nodeIdStr.split(":"); nodeId = BuilderUtils.newNodeId(splits[0], Integer.parseInt(splits[1])); } @@ -96,6 +104,7 @@ public class MockNM { req.setHttpPort(httpPort); Resource resource = BuilderUtils.newResource(memory, vCores); req.setResource(resource); + req.setNMVersion(version); RegisterNodeManagerResponse registrationResponse = resourceTracker.registerNodeManager(req); this.currentContainerTokenMasterKey = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index d69828d0fc4..049bac860b7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -183,6 +183,11 @@ public class MockNodes { return null; } + @Override + public String getNodeManagerVersion() { + return null; + } + @Override public List pullContainerUpdates() { return new ArrayList(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java index 608fa6449b4..c060bb603c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java @@ -110,7 +110,7 @@ public class TestRMNodeTransitions { new TestSchedulerEventDispatcher()); NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); - node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null); + node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); } @@ -166,7 +166,7 @@ public class TestRMNodeTransitions { node.handle(new RMNodeEvent(null,RMNodeEventType.STARTED)); NodeId nodeId = BuilderUtils.newNodeId("localhost:1", 1); - RMNodeImpl node2 = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null); + RMNodeImpl node2 = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); node2.handle(new RMNodeEvent(null,RMNodeEventType.STARTED)); ContainerId completedContainerIdFromNode1 = BuilderUtils.newContainerId( @@ -432,7 +432,7 @@ public class TestRMNodeTransitions { private RMNodeImpl getRunningNode() { NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0, - null, null); + null, null, null); node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED)); Assert.assertEquals(NodeState.RUNNING, node.getState()); return node; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index 6cd5a02f182..1979273ce12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -49,7 +49,7 @@ public class TestNodesPage { // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. final int numberOfThInMetricsTable = 13; - final int numberOfActualTableHeaders = 9; + final int numberOfActualTableHeaders = 10; private Injector injector; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java index 13041342427..b26c37f13d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java @@ -655,13 +655,14 @@ public class TestRMWebServicesNodes extends JerseyTest { WebServicesTestUtils.getXmlString(element, "healthReport"), WebServicesTestUtils.getXmlInt(element, "numContainers"), WebServicesTestUtils.getXmlLong(element, "usedMemoryMB"), - WebServicesTestUtils.getXmlLong(element, "availMemoryMB")); + WebServicesTestUtils.getXmlLong(element, "availMemoryMB"), + WebServicesTestUtils.getXmlString(element, "version")); } } public void verifyNodeInfo(JSONObject nodeInfo, MockNM nm) throws JSONException, Exception { - assertEquals("incorrect number of elements", 10, nodeInfo.length()); + assertEquals("incorrect number of elements", 11, nodeInfo.length()); verifyNodeInfoGeneric(nm, nodeInfo.getString("state"), nodeInfo.getString("rack"), @@ -669,14 +670,15 @@ public class TestRMWebServicesNodes extends JerseyTest { nodeInfo.getString("nodeHTTPAddress"), nodeInfo.getLong("lastHealthUpdate"), nodeInfo.getString("healthReport"), nodeInfo.getInt("numContainers"), - nodeInfo.getLong("usedMemoryMB"), nodeInfo.getLong("availMemoryMB")); + nodeInfo.getLong("usedMemoryMB"), nodeInfo.getLong("availMemoryMB"), + nodeInfo.getString("version")); } public void verifyNodeInfoGeneric(MockNM nm, String state, String rack, String id, String nodeHostName, String nodeHTTPAddress, long lastHealthUpdate, String healthReport, - int numContainers, long usedMemoryMB, long availMemoryMB) + int numContainers, long usedMemoryMB, long availMemoryMB, String version) throws JSONException, Exception { RMNode node = rm.getRMContext().getRMNodes().get(nm.getNodeId()); @@ -695,6 +697,8 @@ public class TestRMWebServicesNodes extends JerseyTest { + nm.getHttpPort(); WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", expectedHttpAddress, nodeHTTPAddress); + WebServicesTestUtils.checkStringMatch("version", + node.getNodeManagerVersion(), version); long expectedHealthUpdate = node.getLastHealthReportTime(); assertEquals("lastHealthUpdate doesn't match, got: " + lastHealthUpdate From fb3f338c652be737436accd076d2aacfd70c276c Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Thu, 3 Oct 2013 21:14:35 +0000 Subject: [PATCH 036/133] MAPREDUCE-5489. MR jobs hangs as it does not use the node-blacklisting feature in RM requests (Zhijie Shen via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529005 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../v2/app/rm/RMContainerRequestor.java | 27 ++++++- .../v2/app/TestRMContainerAllocator.java | 74 +++++++++++++++---- 3 files changed, 87 insertions(+), 17 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index af76c67ea89..24cd4fbeee0 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -272,6 +272,9 @@ Release 2.1.2 - UNRELEASED tests jar is breaking tests for downstream components (Robert Kanter via Sandy Ryza) + MAPREDUCE-5489. MR jobs hangs as it does not use the node-blacklisting + feature in RM requests (Zhijie Shen via bikas) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java index 67dd30e1641..a9b5ce58479 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java @@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -86,6 +87,10 @@ public abstract class RMContainerRequestor extends RMCommunicator { private final Map nodeFailures = new HashMap(); private final Set blacklistedNodes = Collections .newSetFromMap(new ConcurrentHashMap()); + private final Set blacklistAdditions = Collections + .newSetFromMap(new ConcurrentHashMap()); + private final Set blacklistRemovals = Collections + .newSetFromMap(new ConcurrentHashMap()); public RMContainerRequestor(ClientService clientService, AppContext context) { super(clientService, context); @@ -145,10 +150,13 @@ public abstract class RMContainerRequestor extends RMCommunicator { } protected AllocateResponse makeRemoteRequest() throws IOException { + ResourceBlacklistRequest blacklistRequest = + ResourceBlacklistRequest.newInstance(new ArrayList(blacklistAdditions), + new ArrayList(blacklistRemovals)); AllocateRequest allocateRequest = AllocateRequest.newInstance(lastResponseID, super.getApplicationProgress(), new ArrayList(ask), - new ArrayList(release), null); + new ArrayList(release), blacklistRequest); AllocateResponse allocateResponse; try { allocateResponse = scheduler.allocate(allocateRequest); @@ -172,6 +180,14 @@ public abstract class RMContainerRequestor extends RMCommunicator { ask.clear(); release.clear(); + + if (blacklistAdditions.size() > 0 || blacklistRemovals.size() > 0) { + LOG.info("Update the blacklist for " + applicationId + + ": blacklistAdditions=" + blacklistAdditions.size() + + " blacklistRemovals=" + blacklistRemovals.size()); + } + blacklistAdditions.clear(); + blacklistRemovals.clear(); return allocateResponse; } @@ -195,11 +211,17 @@ public abstract class RMContainerRequestor extends RMCommunicator { if (ignoreBlacklisting.compareAndSet(false, true)) { LOG.info("Ignore blacklisting set to true. Known: " + clusterNmCount + ", Blacklisted: " + blacklistedNodeCount + ", " + val + "%"); + // notify RM to ignore all the blacklisted nodes + blacklistAdditions.clear(); + blacklistRemovals.addAll(blacklistedNodes); } } else { if (ignoreBlacklisting.compareAndSet(true, false)) { LOG.info("Ignore blacklisting set to false. Known: " + clusterNmCount + ", Blacklisted: " + blacklistedNodeCount + ", " + val + "%"); + // notify RM of all the blacklisted nodes + blacklistAdditions.addAll(blacklistedNodes); + blacklistRemovals.clear(); } } } @@ -221,6 +243,9 @@ public abstract class RMContainerRequestor extends RMCommunicator { LOG.info(failures + " failures on node " + hostName); if (failures >= maxTaskFailuresPerNode) { blacklistedNodes.add(hostName); + if (!ignoreBlacklisting.get()) { + blacklistAdditions.add(hostName); + } //Even if blacklisting is ignored, continue to remove the host from // the request table. The RM may have additional nodes it can allocate on. LOG.info("Blacklisted host " + hostName); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java index 8e0f7f8960c..ee0544a0537 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java @@ -880,8 +880,10 @@ public class TestRMContainerAllocator { dispatcher.await(); assigned = allocator.schedule(); + Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); dispatcher.await(); - Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); + Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); + assertBlacklistAdditionsAndRemovals(2, 0, rm); // mark h1/h2 as bad nodes nodeManager1.nodeHeartbeat(false); @@ -890,12 +892,14 @@ public class TestRMContainerAllocator { assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals(0, 0, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); nodeManager3.nodeHeartbeat(true); // Node heartbeat dispatcher.await(); - assigned = allocator.schedule(); + assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals(0, 0, rm); Assert.assertTrue("No of assignments must be 3", assigned.size() == 3); @@ -948,7 +952,7 @@ public class TestRMContainerAllocator { // Known=1, blacklisted=0, ignore should be false - assign first container assigned = getContainerOnHost(jobId, 1, 1024, new String[] { "h1" }, - nodeManagers[0], dispatcher, allocator); + nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); LOG.info("Failing container _1 on H1 (Node should be blacklisted and" @@ -958,44 +962,52 @@ public class TestRMContainerAllocator { allocator.sendFailure(f1); // Test single node. + // Known=1, blacklisted=1, ignore should be true - assign 0 + // Because makeRemoteRequest will not be aware of it until next call + // The current call will send blacklisted node "h1" to RM + assigned = + getContainerOnHost(jobId, 2, 1024, new String[] { "h1" }, + nodeManagers[0], dispatcher, allocator, 1, 0, 0, 1, rm); + Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); + // Known=1, blacklisted=1, ignore should be true - assign 1 assigned = getContainerOnHost(jobId, 2, 1024, new String[] { "h1" }, - nodeManagers[0], dispatcher, allocator); + nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher); // Known=2, blacklisted=1, ignore should be true - assign 1 anyway. assigned = getContainerOnHost(jobId, 3, 1024, new String[] { "h2" }, - nodeManagers[1], dispatcher, allocator); + nodeManagers[1], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher); // Known=3, blacklisted=1, ignore should be true - assign 1 anyway. assigned = getContainerOnHost(jobId, 4, 1024, new String[] { "h3" }, - nodeManagers[2], dispatcher, allocator); + nodeManagers[2], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); // Known=3, blacklisted=1, ignore should be true - assign 1 assigned = getContainerOnHost(jobId, 5, 1024, new String[] { "h1" }, - nodeManagers[0], dispatcher, allocator); + nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); nodeManagers[nmNum] = registerNodeManager(nmNum++, rm, dispatcher); // Known=4, blacklisted=1, ignore should be false - assign 1 anyway assigned = getContainerOnHost(jobId, 6, 1024, new String[] { "h4" }, - nodeManagers[3], dispatcher, allocator); + nodeManagers[3], dispatcher, allocator, 0, 0, 1, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); // Test blacklisting re-enabled. // Known=4, blacklisted=1, ignore should be false - no assignment on h1 assigned = getContainerOnHost(jobId, 7, 1024, new String[] { "h1" }, - nodeManagers[0], dispatcher, allocator); + nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); // RMContainerRequestor would have created a replacement request. @@ -1004,17 +1016,24 @@ public class TestRMContainerAllocator { allocator.sendFailure(f2); // Test ignore blacklisting re-enabled + // Known=4, blacklisted=2, ignore should be true. Should assign 0 + // container for the same reason above. + assigned = + getContainerOnHost(jobId, 8, 1024, new String[] { "h1" }, + nodeManagers[0], dispatcher, allocator, 1, 0, 0, 2, rm); + Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); + // Known=4, blacklisted=2, ignore should be true. Should assign 2 // containers. assigned = getContainerOnHost(jobId, 8, 1024, new String[] { "h1" }, - nodeManagers[0], dispatcher, allocator); + nodeManagers[0], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 2", 2, assigned.size()); // Known=4, blacklisted=2, ignore should be true. assigned = getContainerOnHost(jobId, 9, 1024, new String[] { "h2" }, - nodeManagers[1], dispatcher, allocator); + nodeManagers[1], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); // Test blacklist while ignore blacklisting enabled @@ -1025,7 +1044,7 @@ public class TestRMContainerAllocator { // Known=5, blacklisted=3, ignore should be true. assigned = getContainerOnHost(jobId, 10, 1024, new String[] { "h3" }, - nodeManagers[2], dispatcher, allocator); + nodeManagers[2], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); // Assign on 5 more nodes - to re-enable blacklisting @@ -1034,14 +1053,14 @@ public class TestRMContainerAllocator { assigned = getContainerOnHost(jobId, 11 + i, 1024, new String[] { String.valueOf(5 + i) }, nodeManagers[4 + i], - dispatcher, allocator); + dispatcher, allocator, 0, 0, (i == 4 ? 3 : 0), 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); } // Test h3 (blacklisted while ignoring blacklisting) is blacklisted. assigned = getContainerOnHost(jobId, 20, 1024, new String[] { "h3" }, - nodeManagers[2], dispatcher, allocator); + nodeManagers[2], dispatcher, allocator, 0, 0, 0, 0, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); } @@ -1055,7 +1074,9 @@ public class TestRMContainerAllocator { private List getContainerOnHost(JobId jobId, int taskAttemptId, int memory, String[] hosts, MockNM mockNM, - DrainDispatcher dispatcher, MyContainerAllocator allocator) + DrainDispatcher dispatcher, MyContainerAllocator allocator, + int expectedAdditions1, int expectedRemovals1, + int expectedAdditions2, int expectedRemovals2, MyResourceManager rm) throws Exception { ContainerRequestEvent reqEvent = createReq(jobId, taskAttemptId, memory, hosts); @@ -1064,6 +1085,8 @@ public class TestRMContainerAllocator { // Send the request to the RM List assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals( + expectedAdditions1, expectedRemovals1, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); // Heartbeat from the required nodeManager @@ -1072,6 +1095,8 @@ public class TestRMContainerAllocator { assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals( + expectedAdditions2, expectedRemovals2, rm); return assigned; } @@ -1137,6 +1162,7 @@ public class TestRMContainerAllocator { LOG.info("RM Heartbeat (To process the scheduled containers)"); assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals(0, 0, rm); Assert.assertEquals("No of assignments must be 1", 1, assigned.size()); LOG.info("Failing container _1 on H1 (should blacklist the node)"); @@ -1153,6 +1179,7 @@ public class TestRMContainerAllocator { //Update the Scheduler with the new requests. assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals(1, 0, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); // send another request with different resource and priority @@ -1171,6 +1198,7 @@ public class TestRMContainerAllocator { LOG.info("RM Heartbeat (To process the scheduled containers)"); assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals(0, 0, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); //RMContainerAllocator gets assigned a p:5 on a blacklisted node. @@ -1179,6 +1207,7 @@ public class TestRMContainerAllocator { LOG.info("RM Heartbeat (To process the re-scheduled containers)"); assigned = allocator.schedule(); dispatcher.await(); + assertBlacklistAdditionsAndRemovals(0, 0, rm); Assert.assertEquals("No of assignments must be 0", 0, assigned.size()); //Hearbeat from H3 to schedule on this host. @@ -1188,6 +1217,7 @@ public class TestRMContainerAllocator { LOG.info("RM Heartbeat (To process the re-scheduled containers for H3)"); assigned = allocator.schedule(); + assertBlacklistAdditionsAndRemovals(0, 0, rm); dispatcher.await(); // For debugging @@ -1205,7 +1235,15 @@ public class TestRMContainerAllocator { + " host not correct", "h3", assig.getContainer().getNodeId().getHost()); } } - + + private static void assertBlacklistAdditionsAndRemovals( + int expectedAdditions, int expectedRemovals, MyResourceManager rm) { + Assert.assertEquals(expectedAdditions, + rm.getMyFifoScheduler().lastBlacklistAdditions.size()); + Assert.assertEquals(expectedRemovals, + rm.getMyFifoScheduler().lastBlacklistRemovals.size()); + } + private static class MyFifoScheduler extends FifoScheduler { public MyFifoScheduler(RMContext rmContext) { @@ -1220,6 +1258,8 @@ public class TestRMContainerAllocator { } List lastAsk = null; + List lastBlacklistAdditions; + List lastBlacklistRemovals; // override this to copy the objects otherwise FifoScheduler updates the // numContainers in same objects as kept by RMContainerAllocator @@ -1236,6 +1276,8 @@ public class TestRMContainerAllocator { askCopy.add(reqCopy); } lastAsk = ask; + lastBlacklistAdditions = blacklistAdditions; + lastBlacklistRemovals = blacklistRemovals; return super.allocate( applicationAttemptId, askCopy, release, blacklistAdditions, blacklistRemovals); From 79a11ce09df3662b3ce83db684a6761f4f1638a4 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Thu, 3 Oct 2013 21:54:35 +0000 Subject: [PATCH 037/133] YARN-890. Ensure CapacityScheduler doesn't round-up metric for available resources. Contributed by Xuan Gong & Hitesh Shah. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529015 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../scheduler/capacity/CSQueueUtils.java | 8 ++------ .../scheduler/capacity/TestLeafQueue.java | 18 ++++++++++++------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1db9894223e..c0eca9c3506 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -134,6 +134,9 @@ Release 2.1.2 - UNRELEASED YARN-876. Node resource is added twice when node comes back from unhealthy to healthy. (Peng Zhang via Sandy Ryza) + YARN-890. Ensure CapacityScheduler doesn't round-up metric for available + resources. (Xuan Gong & Hitesh Shah via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java index 595b3a883b8..1dd55862b9c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java @@ -99,15 +99,11 @@ class CSQueueUtils { Resources.divide(calculator, clusterResource, usedResources, queueLimit); } - + childQueue.setUsedCapacity(usedCapacity); childQueue.setAbsoluteUsedCapacity(absoluteUsedCapacity); - Resource available = - Resources.roundUp( - calculator, - Resources.subtract(queueLimit, usedResources), - minimumAllocation); + Resource available = Resources.subtract(queueLimit, usedResources); childQueue.getMetrics().setAvailableResourcesToQueue( Resources.max( calculator, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index f6e13a23540..4f4bf2f4708 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -283,8 +283,9 @@ public class TestLeafQueue { // Setup some nodes String host_0 = "127.0.0.1"; - FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB); - + FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, + 8*GB); + final int numNodes = 1; Resource clusterResource = Resources.createResource(numNodes * (8*GB), numNodes * 16); @@ -300,7 +301,9 @@ public class TestLeafQueue { // Only 1 container a.assignContainers(clusterResource, node_0); - assertEquals(6*GB, a.getMetrics().getAvailableMB()); + assertEquals( + (int)(node_0.getTotalResource().getMemory() * a.getCapacity()) - (1*GB), + a.getMetrics().getAvailableMB()); } @Test @@ -405,8 +408,9 @@ public class TestLeafQueue { // Setup some nodes String host_0 = "127.0.0.1"; - FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB); - + FiCaSchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, + 8*GB); + final int numNodes = 1; Resource clusterResource = Resources.createResource(numNodes * (8*GB), numNodes * 16); @@ -493,12 +497,14 @@ public class TestLeafQueue { a.completedContainer(clusterResource, app_1, node_0, rmContainer, null, RMContainerEventType.KILL, null); } + assertEquals(0*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemory()); assertEquals(0*GB, a.getMetrics().getReservedMB()); assertEquals(0*GB, a.getMetrics().getAllocatedMB()); - assertEquals(1*GB, a.getMetrics().getAvailableMB()); + assertEquals((int)(a.getCapacity() * node_0.getTotalResource().getMemory()), + a.getMetrics().getAvailableMB()); } @Test From 1608d8b5275dc6f6328f16d6949eca65bec27d05 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 3 Oct 2013 23:21:32 +0000 Subject: [PATCH 038/133] YARN-621. Changed YARN web app to not add paths that can cause duplicate additions of authenticated filters there by causing kerberos replay errors. Contributed by Omkar Vinit Joshi. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529030 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 ++++ .../main/java/org/apache/hadoop/yarn/webapp/WebApps.java | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c0eca9c3506..e604d2c91b2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -137,6 +137,10 @@ Release 2.1.2 - UNRELEASED YARN-890. Ensure CapacityScheduler doesn't round-up metric for available resources. (Xuan Gong & Hitesh Shah via acmurthy) + YARN-621. Changed YARN web app to not add paths that can cause duplicate + additions of authenticated filters there by causing kerberos replay errors. + (Omkar Vinit Joshi via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java index a56d2a4beda..d2ce2f22bbc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java @@ -23,8 +23,10 @@ import static com.google.common.base.Preconditions.checkNotNull; import java.io.IOException; import java.net.ConnectException; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import javax.servlet.http.HttpServlet; @@ -167,18 +169,23 @@ public class WebApps { webapp.setWebServices(wsName); String basePath = "/" + name; webapp.setRedirectPath(basePath); + List pathList = new ArrayList(); if (basePath.equals("/")) { webapp.addServePathSpec("/*"); + pathList.add("/*"); } else { webapp.addServePathSpec(basePath); webapp.addServePathSpec(basePath + "/*"); + pathList.add(basePath + "/*"); } if (wsName != null && !wsName.equals(basePath)) { if (wsName.equals("/")) { webapp.addServePathSpec("/*"); + pathList.add("/*"); } else { webapp.addServePathSpec("/" + wsName); webapp.addServePathSpec("/" + wsName + "/*"); + pathList.add("/" + wsName + "/*"); } } if (conf == null) { @@ -212,7 +219,7 @@ public class WebApps { HttpServer server = new HttpServer(name, bindAddress, port, findPort, conf, new AdminACLsManager(conf).getAdminAcl(), null, - webapp.getServePathSpecs()) { + pathList.toArray(new String[0])) { { if (UserGroupInformation.isSecurityEnabled()) { From ac2cdb5f6586cc3358ea4fc818f477959d2daa38 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Thu, 3 Oct 2013 23:29:39 +0000 Subject: [PATCH 039/133] YARN-1236. FairScheduler setting queue name in RMApp is not working. (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529034 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../scheduler/fair/FairScheduler.java | 5 ++++- .../scheduler/fair/TestFairScheduler.java | 12 +++++++++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index e604d2c91b2..3559edaeaec 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -141,6 +141,9 @@ Release 2.1.2 - UNRELEASED additions of authenticated filters there by causing kerberos replay errors. (Omkar Vinit Joshi via vinodkv) + YARN-1236. FairScheduler setting queue name in RMApp is not working. + (Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index f7897e0f2f6..f2ac6a699c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -634,7 +634,8 @@ public class FairScheduler implements ResourceScheduler { return; } - RMApp rmApp = rmContext.getRMApps().get(applicationAttemptId); + RMApp rmApp = rmContext.getRMApps().get( + applicationAttemptId.getApplicationId()); FSLeafQueue queue = assignToQueue(rmApp, queueName, user); FSSchedulerApp schedulerApp = @@ -686,6 +687,8 @@ public class FairScheduler implements ResourceScheduler { if (rmApp != null) { rmApp.setQueue(queue.getName()); + } else { + LOG.warn("Couldn't find RM app to set queue name on"); } return queue; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 7cc800260e7..f09d2c4a04f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; @@ -603,13 +604,22 @@ public class TestFairScheduler { Configuration conf = createConfiguration(); conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "true"); scheduler.reinitialize(conf, resourceManager.getRMContext()); + RMContext rmContext = resourceManager.getRMContext(); + Map appsMap = rmContext.getRMApps(); + ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1); + RMApp rmApp = new RMAppImpl(appAttemptId.getApplicationId(), rmContext, conf, + null, null, null, ApplicationSubmissionContext.newInstance(null, null, + null, null, null, false, false, 0, null, null), null, null, 0, null); + appsMap.put(appAttemptId.getApplicationId(), rmApp); + AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent( - createAppAttemptId(1, 1), "default", "user1"); + appAttemptId, "default", "user1"); scheduler.handle(appAddedEvent); assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getAppSchedulables().size()); assertEquals(0, scheduler.getQueueManager().getLeafQueue("default", true) .getAppSchedulables().size()); + assertEquals("root.user1", rmApp.getQueue()); conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "false"); scheduler.reinitialize(conf, resourceManager.getRMContext()); From 8ebf37f3691dee523f7d800bc82c7423c3e262e9 Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Fri, 4 Oct 2013 00:23:35 +0000 Subject: [PATCH 040/133] YARN-1256. NM silently ignores non-existent service in StartContainerRequest (Xuan Gong via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529039 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/util/AuxiliaryServiceHelper.java | 7 +- .../containermanager/AuxServices.java | 88 ++++++++++++------- .../ContainerManagerImpl.java | 13 +++ .../TestContainerManagerWithLCE.java | 11 +++ .../TestContainerManager.java | 46 ++++++++++ 6 files changed, 134 insertions(+), 34 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3559edaeaec..6840dde667f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -144,6 +144,9 @@ Release 2.1.2 - UNRELEASED YARN-1236. FairScheduler setting queue name in RMApp is not working. (Sandy Ryza) + YARN-1256. NM silently ignores non-existent service in + StartContainerRequest (Xuan Gong via bikas) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java index 23fc50fcec8..cb118f56da9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java @@ -30,8 +30,11 @@ public class AuxiliaryServiceHelper { public static ByteBuffer getServiceDataFromEnv(String serviceName, Map env) { - byte[] metaData = - Base64.decodeBase64(env.get(getPrefixServiceName(serviceName))); + String meta = env.get(getPrefixServiceName(serviceName)); + if (null == meta) { + return null; + } + byte[] metaData = Base64.decodeBase64(meta); return ByteBuffer.wrap(metaData); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java index 0e0e7668f48..5fe5b141bc0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java @@ -175,39 +175,56 @@ public class AuxServices extends AbstractService LOG.info("Got event " + event.getType() + " for appId " + event.getApplicationID()); switch (event.getType()) { - case APPLICATION_INIT: - LOG.info("Got APPLICATION_INIT for service " + event.getServiceID()); - AuxiliaryService service = serviceMap.get(event.getServiceID()); - if (null == service) { - LOG.info("service is null"); - // TODO kill all containers waiting on Application - return; - } - service.initializeApplication(new ApplicationInitializationContext(event - .getUser(), event.getApplicationID(), event.getServiceData())); - break; - case APPLICATION_STOP: - for (AuxiliaryService serv : serviceMap.values()) { - serv.stopApplication(new ApplicationTerminationContext(event - .getApplicationID())); - } - break; - case CONTAINER_INIT: - for (AuxiliaryService serv : serviceMap.values()) { - serv.initializeContainer(new ContainerInitializationContext( - event.getUser(), event.getContainer().getContainerId(), - event.getContainer().getResource())); - } - break; - case CONTAINER_STOP: - for (AuxiliaryService serv : serviceMap.values()) { - serv.stopContainer(new ContainerTerminationContext( - event.getUser(), event.getContainer().getContainerId(), - event.getContainer().getResource())); - } - break; + case APPLICATION_INIT: + LOG.info("Got APPLICATION_INIT for service " + event.getServiceID()); + AuxiliaryService service = null; + try { + service = serviceMap.get(event.getServiceID()); + service + .initializeApplication(new ApplicationInitializationContext(event + .getUser(), event.getApplicationID(), event.getServiceData())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(service, + AuxServicesEventType.APPLICATION_INIT, th); + } + break; + case APPLICATION_STOP: + for (AuxiliaryService serv : serviceMap.values()) { + try { + serv.stopApplication(new ApplicationTerminationContext(event + .getApplicationID())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(serv, + AuxServicesEventType.APPLICATION_STOP, th); + } + } + break; + case CONTAINER_INIT: + for (AuxiliaryService serv : serviceMap.values()) { + try { + serv.initializeContainer(new ContainerInitializationContext( + event.getUser(), event.getContainer().getContainerId(), + event.getContainer().getResource())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(serv, + AuxServicesEventType.CONTAINER_INIT, th); + } + } + break; + case CONTAINER_STOP: + for (AuxiliaryService serv : serviceMap.values()) { + try { + serv.stopContainer(new ContainerTerminationContext( + event.getUser(), event.getContainer().getContainerId(), + event.getContainer().getResource())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(serv, + AuxServicesEventType.CONTAINER_STOP, th); + } + } + break; default: - throw new RuntimeException("Unknown type: " + event.getType()); + throw new RuntimeException("Unknown type: " + event.getType()); } } @@ -217,4 +234,11 @@ public class AuxServices extends AbstractService } return p.matcher(name).matches(); } + + private void logWarningWhenAuxServiceThrowExceptions(AuxiliaryService service, + AuxServicesEventType eventType, Throwable th) { + LOG.warn((null == service ? "The auxService is null" + : "The auxService name is " + service.getName()) + + " and it got an error at event: " + eventType, th); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 0af4332cef8..f24a5544dfd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.InvalidAuxServiceException; import org.apache.hadoop.yarn.exceptions.InvalidContainerException; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -451,6 +452,18 @@ public class ContainerManagerImpl extends CompositeService implements ContainerLaunchContext launchContext = request.getContainerLaunchContext(); + Map serviceData = getAuxServiceMetaData(); + if (launchContext.getServiceData()!=null && + !launchContext.getServiceData().isEmpty()) { + for (Map.Entry meta : launchContext.getServiceData() + .entrySet()) { + if (null == serviceData.get(meta.getKey())) { + throw new InvalidAuxServiceException("The auxService:" + meta.getKey() + + " does not exist"); + } + } + } + Credentials credentials = parseCredentials(launchContext); Container container = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java index cc9b7d9ce0c..a47e7f78e19 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java @@ -178,6 +178,17 @@ public class TestContainerManagerWithLCE extends TestContainerManager { super.testMultipleContainersStopAndGetStatus(); } + @Override + public void testStartContainerFailureWithUnknownAuxService() throws Exception { + // Don't run the test if the binary is not available. + if (!shouldRunTest()) { + LOG.info("LCE binary path is not passed. Not running the test"); + return; + } + LOG.info("Running testContainerLaunchFromPreviousRM"); + super.testStartContainerFailureWithUnknownAuxService(); + } + private boolean shouldRunTest() { return System .getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index e5b318ee4cd..90cd16e16eb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -24,6 +24,7 @@ import java.io.FileReader; import java.io.IOException; import java.io.PrintWriter; import java.net.InetAddress; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -37,6 +38,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.service.Service; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; @@ -59,6 +61,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.InvalidContainerException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; @@ -68,6 +71,7 @@ import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestAuxServices.ServiceA; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; @@ -746,6 +750,48 @@ public class TestContainerManager extends BaseContainerManagerTest { } } + @Test + public void testStartContainerFailureWithUnknownAuxService() throws Exception { + conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, + new String[] { "existService" }); + conf.setClass( + String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "existService"), + ServiceA.class, Service.class); + containerManager.start(); + + List startRequest = + new ArrayList(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + Map serviceData = new HashMap(); + String serviceName = "non_exist_auxService"; + serviceData.put(serviceName, ByteBuffer.wrap(serviceName.getBytes())); + containerLaunchContext.setServiceData(serviceData); + + ContainerId cId = createContainerId(0); + String user = "start_container_fail"; + Token containerToken = + createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), + user, context.getContainerTokenSecretManager()); + StartContainerRequest request = + StartContainerRequest.newInstance(containerLaunchContext, + containerToken); + + // start containers + startRequest.add(request); + StartContainersRequest requestList = + StartContainersRequest.newInstance(startRequest); + + StartContainersResponse response = + containerManager.startContainers(requestList); + Assert.assertTrue(response.getFailedRequests().size() == 1); + Assert.assertTrue(response.getSuccessfullyStartedContainers().size() == 0); + Assert.assertTrue(response.getFailedRequests().containsKey(cId)); + Assert.assertTrue(response.getFailedRequests().get(cId).getMessage() + .contains("The auxService:" + serviceName + " does not exist")); + } + public static Token createContainerToken(ContainerId cId, long rmIdentifier, NodeId nodeId, String user, NMContainerTokenSecretManager containerTokenSecretManager) From d3841bd4997a77855ab2abd9cc294eae7d795a5b Mon Sep 17 00:00:00 2001 From: Hitesh Shah Date: Fri, 4 Oct 2013 00:43:26 +0000 Subject: [PATCH 041/133] YARN-1149. NM throws InvalidStateTransitonException: Invalid event: APPLICATION_LOG_HANDLING_FINISHED at RUNNING. Contributed by Xuan Gong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529043 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/client/api/impl/TestAMRMClient.java | 1 + .../nodemanager/CMgrCompletedAppsEvent.java | 20 ++- .../CMgrCompletedContainersEvent.java | 14 +- .../yarn/server/nodemanager/NodeManager.java | 78 +------- .../nodemanager/NodeStatusUpdaterImpl.java | 10 +- .../ContainerManagerImpl.java | 167 ++++++++++++++---- .../application/ApplicationImpl.java | 24 ++- .../nodemanager/TestNodeManagerReboot.java | 1 + .../nodemanager/TestNodeManagerResync.java | 1 + .../nodemanager/TestNodeManagerShutdown.java | 1 + .../nodemanager/TestNodeStatusUpdater.java | 46 +++-- .../BaseContainerManagerTest.java | 1 + .../TestContainerManager.java | 2 +- .../application/TestApplication.java | 4 +- .../TestLogAggregationService.java | 2 +- 16 files changed, 230 insertions(+), 145 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6840dde667f..cb6219cc122 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -147,6 +147,9 @@ Release 2.1.2 - UNRELEASED YARN-1256. NM silently ignores non-existent service in StartContainerRequest (Xuan Gong via bikas) + YARN-1149. NM throws InvalidStateTransitonException: Invalid event: + APPLICATION_LOG_HANDLING_FINISHED at RUNNING (Xuan Gong via hitesh) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index f24a2cd88fb..58ef215f24d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -107,6 +107,7 @@ public class TestAMRMClient { // start minicluster conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, 100); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); yarnCluster = new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1); yarnCluster.init(conf); yarnCluster.start(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java index 19be39fccb5..de1b7f4d279 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java @@ -27,13 +27,31 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEventType; public class CMgrCompletedAppsEvent extends ContainerManagerEvent { private final List appsToCleanup; + private final Reason reason; - public CMgrCompletedAppsEvent(List appsToCleanup) { + public CMgrCompletedAppsEvent(List appsToCleanup, Reason reason) { super(ContainerManagerEventType.FINISH_APPS); this.appsToCleanup = appsToCleanup; + this.reason = reason; } public List getAppsToCleanup() { return this.appsToCleanup; } + + public Reason getReason() { + return reason; + } + + public static enum Reason { + /** + * Application is killed as NodeManager is shut down + */ + ON_SHUTDOWN, + + /** + * Application is killed by ResourceManager + */ + BY_RESOURCEMANAGER + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java index 675b6051211..e5e55374af9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java @@ -24,24 +24,14 @@ import org.apache.hadoop.yarn.api.records.ContainerId; public class CMgrCompletedContainersEvent extends ContainerManagerEvent { - private List containerToCleanup; - private Reason reason; + private final List containerToCleanup; - public CMgrCompletedContainersEvent(List containersToCleanup, Reason reason) { + public CMgrCompletedContainersEvent(List containersToCleanup) { super(ContainerManagerEventType.FINISH_CONTAINERS); this.containerToCleanup = containersToCleanup; - this.reason = reason; } public List getContainersToCleanup() { return this.containerToCleanup; } - - public Reason getReason() { - return reason; - } - - public static enum Reason { - ON_SHUTDOWN, BY_RESOURCEMANAGER - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index e287adde1d2..79b9d7a83ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -19,9 +19,6 @@ package org.apache.hadoop.yarn.server.nodemanager; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; @@ -67,11 +64,6 @@ public class NodeManager extends CompositeService * Priority of the NodeManager shutdown hook. */ public static final int SHUTDOWN_HOOK_PRIORITY = 30; - - /** - * Extra duration to wait for containers to be killed on shutdown. - */ - private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000; private static final Log LOG = LogFactory.getLog(NodeManager.class); protected final NodeManagerMetrics metrics = NodeManagerMetrics.create(); @@ -84,8 +76,6 @@ public class NodeManager extends CompositeService private NodeStatusUpdater nodeStatusUpdater; private static CompositeServiceShutdownHook nodeManagerShutdownHook; - private long waitForContainersOnShutdownMillis; - private AtomicBoolean isStopping = new AtomicBoolean(false); public NodeManager() { @@ -193,13 +183,6 @@ public class NodeManager extends CompositeService // so that we make sure everything is up before registering with RM. addService(nodeStatusUpdater); - waitForContainersOnShutdownMillis = - conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, - YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + - conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS, - YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) + - SHUTDOWN_CLEANUP_SLOP_MS; - super.serviceInit(conf); // TODO add local dirs to del } @@ -219,9 +202,6 @@ public class NodeManager extends CompositeService if (isStopping.getAndSet(true)) { return; } - if (context != null) { - cleanupContainers(NodeManagerEventType.SHUTDOWN); - } super.serviceStop(); DefaultMetricsSystem.shutdown(); } @@ -246,68 +226,12 @@ public class NodeManager extends CompositeService public void run() { LOG.info("Notifying ContainerManager to block new container-requests"); containerManager.setBlockNewContainerRequests(true); - cleanupContainers(NodeManagerEventType.RESYNC); + containerManager.cleanUpApplications(NodeManagerEventType.RESYNC); ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater(); } }.start(); } - @SuppressWarnings("unchecked") - protected void cleanupContainers(NodeManagerEventType eventType) { - Map containers = context.getContainers(); - if (containers.isEmpty()) { - return; - } - LOG.info("Containers still running on " + eventType + " : " - + containers.keySet()); - - List containerIds = - new ArrayList(containers.keySet()); - dispatcher.getEventHandler().handle( - new CMgrCompletedContainersEvent(containerIds, - CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN)); - - LOG.info("Waiting for containers to be killed"); - - switch (eventType) { - case SHUTDOWN: - long waitStartTime = System.currentTimeMillis(); - while (!containers.isEmpty() - && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) { - try { - //To remove done containers in NM context - nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext(); - Thread.sleep(1000); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on container kill on shutdown", - ex); - } - } - break; - case RESYNC: - while (!containers.isEmpty()) { - try { - Thread.sleep(1000); - nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext(); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on container kill on resync", - ex); - } - } - break; - default: - LOG.warn("Invalid eventType: " + eventType); - } - - // All containers killed - if (containers.isEmpty()) { - LOG.info("All containers in DONE state"); - } else { - LOG.info("Done waiting for containers to be killed. Still alive: " + - containers.keySet()); - } - } - public static class NMContext implements Context { private NodeId nodeId = null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index ba0065b6d02..b52f9d16500 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -499,18 +499,18 @@ public class NodeStatusUpdaterImpl extends AbstractService implements lastHeartBeatID = response.getResponseId(); List containersToCleanup = response .getContainersToCleanup(); - if (containersToCleanup.size() != 0) { + if (!containersToCleanup.isEmpty()) { dispatcher.getEventHandler().handle( - new CMgrCompletedContainersEvent(containersToCleanup, - CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER)); + new CMgrCompletedContainersEvent(containersToCleanup)); } List appsToCleanup = response.getApplicationsToCleanup(); //Only start tracking for keepAlive on FINISH_APP trackAppsForKeepAlive(appsToCleanup); - if (appsToCleanup.size() != 0) { + if (!appsToCleanup.isEmpty()) { dispatcher.getEventHandler().handle( - new CMgrCompletedAppsEvent(appsToCleanup)); + new CMgrCompletedAppsEvent(appsToCleanup, + CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER)); } } catch (ConnectException e) { //catch and throw the exception if tried MAX wait time to connect RM diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index f24a5544dfd..d158b43f2d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -30,6 +30,9 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; +import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -71,6 +74,7 @@ import org.apache.hadoop.yarn.exceptions.InvalidAuxServiceException; import org.apache.hadoop.yarn.exceptions.InvalidContainerException; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; @@ -83,6 +87,7 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; +import org.apache.hadoop.yarn.server.nodemanager.NodeManagerEventType; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; @@ -120,6 +125,11 @@ public class ContainerManagerImpl extends CompositeService implements ServiceStateChangeListener, ContainerManagementProtocol, EventHandler { + /** + * Extra duration to wait for applications to be killed on shutdown. + */ + private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000; + private static final Log LOG = LogFactory.getLog(ContainerManagerImpl.class); final Context context; @@ -138,6 +148,11 @@ public class ContainerManagerImpl extends CompositeService implements private final DeletionService deletionService; private AtomicBoolean blockNewContainerRequests = new AtomicBoolean(false); + private boolean serviceStopped = false; + private final ReadLock readLock; + private final WriteLock writeLock; + + private long waitForContainersOnShutdownMillis; public ContainerManagerImpl(Context context, ContainerExecutor exec, DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater, @@ -181,6 +196,10 @@ public class ContainerManagerImpl extends CompositeService implements dispatcher.register(ContainersLauncherEventType.class, containersLauncher); addService(dispatcher); + + ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + this.readLock = lock.readLock(); + this.writeLock = lock.writeLock(); } @Override @@ -190,6 +209,13 @@ public class ContainerManagerImpl extends CompositeService implements addIfService(logHandler); dispatcher.register(LogHandlerEventType.class, logHandler); + waitForContainersOnShutdownMillis = + conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, + YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + + conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS, + YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) + + SHUTDOWN_CLEANUP_SLOP_MS; + super.serviceInit(conf); } @@ -275,6 +301,16 @@ public class ContainerManagerImpl extends CompositeService implements @Override public void serviceStop() throws Exception { + setBlockNewContainerRequests(true); + this.writeLock.lock(); + try { + serviceStopped = true; + if (context != null) { + cleanUpApplications(NodeManagerEventType.SHUTDOWN); + } + } finally { + this.writeLock.unlock(); + } if (auxiliaryServices.getServiceState() == STARTED) { auxiliaryServices.unregisterServiceListener(this); } @@ -284,6 +320,60 @@ public class ContainerManagerImpl extends CompositeService implements super.serviceStop(); } + public void cleanUpApplications(NodeManagerEventType eventType) { + Map applications = + this.context.getApplications(); + if (applications.isEmpty()) { + return; + } + LOG.info("Applications still running : " + applications.keySet()); + + List appIds = + new ArrayList(applications.keySet()); + this.handle( + new CMgrCompletedAppsEvent(appIds, + CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); + + LOG.info("Waiting for Applications to be Finished"); + + switch (eventType) { + case SHUTDOWN: + long waitStartTime = System.currentTimeMillis(); + while (!applications.isEmpty() + && System.currentTimeMillis() - waitStartTime + < waitForContainersOnShutdownMillis) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on applications finish on shutdown", + ex); + } + } + break; + case RESYNC: + while (!applications.isEmpty()) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on applications finish on resync", + ex); + } + } + break; + default: + throw new YarnRuntimeException("Get an unknown NodeManagerEventType: " + + eventType); + } + + // All applications Finished + if (applications.isEmpty()) { + LOG.info("All applications in FINISHED state"); + } else { + LOG.info("Done waiting for Applications to be Finished. Still alive: " + + applications.keySet()); + } + } + // Get the remoteUGI corresponding to the api call. protected UserGroupInformation getRemoteUgi() throws YarnException { @@ -479,29 +569,40 @@ public class ContainerManagerImpl extends CompositeService implements + " already is running on this node!!"); } - // Create the application - Application application = - new ApplicationImpl(dispatcher, user, applicationID, credentials, context); - if (null == context.getApplications().putIfAbsent(applicationID, - application)) { - LOG.info("Creating a new application reference for app " + applicationID); + this.readLock.lock(); + try { + if (!serviceStopped) { + // Create the application + Application application = + new ApplicationImpl(dispatcher, user, applicationID, credentials, context); + if (null == context.getApplications().putIfAbsent(applicationID, + application)) { + LOG.info("Creating a new application reference for app " + applicationID); - dispatcher.getEventHandler().handle( - new ApplicationInitEvent(applicationID, container.getLaunchContext() - .getApplicationACLs())); + dispatcher.getEventHandler().handle( + new ApplicationInitEvent(applicationID, container.getLaunchContext() + .getApplicationACLs())); + } + + dispatcher.getEventHandler().handle( + new ApplicationContainerInitEvent(container)); + + this.context.getContainerTokenSecretManager().startContainerSuccessful( + containerTokenIdentifier); + NMAuditLogger.logSuccess(user, AuditConstants.START_CONTAINER, + "ContainerManageImpl", applicationID, containerId); + // TODO launchedContainer misplaced -> doesn't necessarily mean a container + // launch. A finished Application will not launch containers. + metrics.launchedContainer(); + metrics.allocateContainer(containerTokenIdentifier.getResource()); + } else { + throw new YarnException( + "Container start failed as the NodeManager is " + + "in the process of shutting down"); + } + } finally { + this.readLock.unlock(); } - - dispatcher.getEventHandler().handle( - new ApplicationContainerInitEvent(container)); - - this.context.getContainerTokenSecretManager().startContainerSuccessful( - containerTokenIdentifier); - NMAuditLogger.logSuccess(user, AuditConstants.START_CONTAINER, - "ContainerManageImpl", applicationID, containerId); - // TODO launchedContainer misplaced -> doesn't necessarily mean a container - // launch. A finished Application will not launch containers. - metrics.launchedContainer(); - metrics.allocateContainer(containerTokenIdentifier.getResource()); } protected ContainerTokenIdentifier verifyAndGetContainerTokenIdentifier( @@ -726,9 +827,15 @@ public class ContainerManagerImpl extends CompositeService implements CMgrCompletedAppsEvent appsFinishedEvent = (CMgrCompletedAppsEvent) event; for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) { + String diagnostic = ""; + if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) { + diagnostic = "Application killed on shutdown"; + } else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) { + diagnostic = "Application killed by ResourceManager"; + } this.dispatcher.getEventHandler().handle( new ApplicationFinishEvent(appID, - "Application Killed by ResourceManager")); + diagnostic)); } break; case FINISH_CONTAINERS: @@ -736,20 +843,14 @@ public class ContainerManagerImpl extends CompositeService implements (CMgrCompletedContainersEvent) event; for (ContainerId container : containersFinishedEvent .getContainersToCleanup()) { - String diagnostic = ""; - if (containersFinishedEvent.getReason() == - CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) { - diagnostic = "Container Killed on Shutdown"; - } else if (containersFinishedEvent.getReason() == - CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) { - diagnostic = "Container Killed by ResourceManager"; - } - this.dispatcher.getEventHandler().handle( - new ContainerKillEvent(container, diagnostic)); + this.dispatcher.getEventHandler().handle( + new ContainerKillEvent(container, + "Container Killed by ResourceManager")); } break; default: - LOG.warn("Invalid event " + event.getType() + ". Ignoring."); + throw new YarnRuntimeException( + "Get an unknown ContainerManagerEvent type: " + event.getType()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java index 43a2fcc0055..21d2f9174f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java @@ -177,6 +177,13 @@ public class ApplicationImpl implements Application { ApplicationState.APPLICATION_RESOURCES_CLEANINGUP), ApplicationEventType.APPLICATION_CONTAINER_FINISHED, new AppFinishTransition()) + .addTransition(ApplicationState.FINISHING_CONTAINERS_WAIT, + ApplicationState.FINISHING_CONTAINERS_WAIT, + EnumSet.of( + ApplicationEventType.APPLICATION_LOG_HANDLING_INITED, + ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED, + ApplicationEventType.APPLICATION_INITED, + ApplicationEventType.FINISH_APPLICATION)) // Transitions from APPLICATION_RESOURCES_CLEANINGUP state .addTransition(ApplicationState.APPLICATION_RESOURCES_CLEANINGUP, @@ -186,12 +193,25 @@ public class ApplicationImpl implements Application { ApplicationState.FINISHED, ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP, new AppCompletelyDoneTransition()) + .addTransition(ApplicationState.APPLICATION_RESOURCES_CLEANINGUP, + ApplicationState.APPLICATION_RESOURCES_CLEANINGUP, + EnumSet.of( + ApplicationEventType.APPLICATION_LOG_HANDLING_INITED, + ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED, + ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED, + ApplicationEventType.APPLICATION_INITED, + ApplicationEventType.FINISH_APPLICATION)) // Transitions from FINISHED state .addTransition(ApplicationState.FINISHED, ApplicationState.FINISHED, ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED, new AppLogsAggregatedTransition()) + .addTransition(ApplicationState.FINISHED, ApplicationState.FINISHED, + EnumSet.of( + ApplicationEventType.APPLICATION_LOG_HANDLING_INITED, + ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED, + ApplicationEventType.FINISH_APPLICATION)) // create the topology tables .installTopology(); @@ -343,7 +363,7 @@ public class ApplicationImpl implements Application { @Override public ApplicationState transition(ApplicationImpl app, ApplicationEvent event) { - + ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event; if (app.containers.isEmpty()) { // No container to cleanup. Cleanup app level resources. app.handleAppFinishWithContainersCleanedup(); @@ -355,7 +375,7 @@ public class ApplicationImpl implements Application { for (ContainerId containerID : app.containers.keySet()) { app.dispatcher.getEventHandler().handle( new ContainerKillEvent(containerID, - "Container killed on application-finish event from RM.")); + "Container killed on application-finish event: " + appEvent.getDiagnostic())); } return ApplicationState.FINISHING_CONTAINERS_WAIT; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java index fab9e019a47..e69170e4a5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java @@ -288,6 +288,7 @@ public class TestNodeManagerReboot { conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "127.0.0.1:12346"); conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index a05e34143b1..3e0846b0422 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -143,6 +143,7 @@ public class TestNodeManagerResync { conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index 4c9559d660a..a13e7c1801e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -242,6 +242,7 @@ public class TestNodeManagerShutdown { conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 2b54751cf82..8372aff851e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -454,13 +454,13 @@ public class TestNodeStatusUpdater { @Override protected void serviceStop() throws Exception { + System.out.println("Called stooppppp"); super.serviceStop(); isStopped = true; - ConcurrentMap containers = - getNMContext().getContainers(); - // ensure that containers are empty - if(!containers.isEmpty()) { + ConcurrentMap applications = + getNMContext().getApplications(); + // ensure that applications are empty + if(!applications.isEmpty()) { assertionFailedInThread.set(true); } syncBarrier.await(10000, TimeUnit.MILLISECONDS); @@ -859,9 +859,20 @@ public class TestNodeStatusUpdater { } @Override - protected void cleanupContainers(NodeManagerEventType eventType) { - super.cleanupContainers(NodeManagerEventType.SHUTDOWN); - numCleanups.incrementAndGet(); + protected ContainerManagerImpl createContainerManager(Context context, + ContainerExecutor exec, DeletionService del, + NodeStatusUpdater nodeStatusUpdater, + ApplicationACLsManager aclsManager, + LocalDirsHandlerService dirsHandler) { + return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, + metrics, aclsManager, dirsHandler) { + + @Override + public void cleanUpApplications(NodeManagerEventType eventType) { + super.cleanUpApplications(NodeManagerEventType.SHUTDOWN); + numCleanups.incrementAndGet(); + } + }; } }; @@ -1161,6 +1172,7 @@ public class TestNodeStatusUpdater { .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs); conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); CyclicBarrier syncBarrier = new CyclicBarrier(2); nm = new MyNodeManager2(syncBarrier, conf); nm.init(conf); @@ -1201,9 +1213,20 @@ public class TestNodeStatusUpdater { } @Override - protected void cleanupContainers(NodeManagerEventType eventType) { - super.cleanupContainers(NodeManagerEventType.SHUTDOWN); - numCleanups.incrementAndGet(); + protected ContainerManagerImpl createContainerManager(Context context, + ContainerExecutor exec, DeletionService del, + NodeStatusUpdater nodeStatusUpdater, + ApplicationACLsManager aclsManager, + LocalDirsHandlerService dirsHandler) { + return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, + metrics, aclsManager, dirsHandler) { + + @Override + public void cleanUpApplications(NodeManagerEventType eventType) { + super.cleanUpApplications(NodeManagerEventType.SHUTDOWN); + numCleanups.incrementAndGet(); + } + }; } }; @@ -1345,6 +1368,7 @@ public class TestNodeStatusUpdater { conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index b02054cef67..4f23427fe2d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -166,6 +166,7 @@ public abstract class BaseContainerManagerTest { conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); // Default delSrvc delSrvc = createDeletionService(); delSrvc.init(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 90cd16e16eb..f62cd50e7ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -542,7 +542,7 @@ public class TestContainerManager extends BaseContainerManagerTest { // Simulate RM sending an AppFinish event. containerManager.handle(new CMgrCompletedAppsEvent(Arrays - .asList(new ApplicationId[] { appId }))); + .asList(new ApplicationId[] { appId }), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java index 3b2878c3709..356029e6907 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java @@ -586,8 +586,8 @@ public class TestApplication { } public void appFinished() { - app.handle(new ApplicationEvent(appId, - ApplicationEventType.FINISH_APPLICATION)); + app.handle(new ApplicationFinishEvent(appId, + "Finish Application")); drainDispatcherEvents(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 5179f3f965f..bfb0e873465 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -826,7 +826,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest { cId, ContainerState.COMPLETE); this.containerManager.handle(new CMgrCompletedAppsEvent(Arrays - .asList(appId))); + .asList(appId), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); this.containerManager.stop(); } From cc76f70fe31f40a710440a9bb42187ac7f3fd105 Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Fri, 4 Oct 2013 00:57:18 +0000 Subject: [PATCH 042/133] Addendum for missing file YARN-1256. NM silently ignores non-existent service in StartContainerRequest (Xuan Gong via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529048 13f79535-47bb-0310-9956-ffa450edef68 --- .../InvalidAuxServiceException.java | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidAuxServiceException.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidAuxServiceException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidAuxServiceException.java new file mode 100644 index 00000000000..b447981e5e4 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidAuxServiceException.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; + +/** + * This exception is thrown by a NodeManager that is rejecting start-container + * requests via + * {@link ContainerManagementProtocol#startContainers(StartContainersRequest)} + * for auxservices does not exist. + */ +public class InvalidAuxServiceException extends YarnException { + + private static final long serialVersionUID = 1L; + + public InvalidAuxServiceException(String msg) { + super(msg); + } +} From d8ea364d07f782f8693b1ac0c340d4bd8d471c32 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Fri, 4 Oct 2013 01:30:19 +0000 Subject: [PATCH 043/133] YARN-1271. Text file busy errors launching containers again (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529058 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../hadoop/yarn/server/nodemanager/ContainerExecutor.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index cb6219cc122..aeff5bf8e14 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -150,6 +150,9 @@ Release 2.1.2 - UNRELEASED YARN-1149. NM throws InvalidStateTransitonException: Invalid event: APPLICATION_LOG_HANDLING_FINISHED at RUNNING (Xuan Gong via hitesh) + YARN-1271. "Text file busy" errors launching containers again + (Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index 3d3aefd04a9..ee72fbc6647 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -218,7 +218,7 @@ public abstract class ContainerExecutor implements Configurable { retCommand.addAll(Arrays.asList("nice", "-n", Integer.toString(containerSchedPriorityAdjustment))); } - retCommand.addAll(Arrays.asList("bash", "-c", command)); + retCommand.addAll(Arrays.asList("bash", command)); return retCommand.toArray(new String[retCommand.size()]); } From 6ff600d9e3496008d81361c17ea427a8675cd0d4 Mon Sep 17 00:00:00 2001 From: Hitesh Shah Date: Fri, 4 Oct 2013 02:36:17 +0000 Subject: [PATCH 044/133] YARN-1131. logs command should return an appropriate error message if YARN application is still running. Contributed by Siddharth Seth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529068 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/mapreduce/tools/CLI.java | 4 +- hadoop-yarn-project/CHANGES.txt | 3 + hadoop-yarn-project/hadoop-yarn/bin/yarn | 2 +- .../hadoop/yarn/client/cli/LogsCLI.java} | 190 ++++++------------ .../hadoop/yarn/client/cli/TestLogsCLI.java | 172 ++++++++++++++++ .../yarn/logaggregation/LogCLIHelpers.java | 162 +++++++++++++++ .../yarn/logaggregation/TestLogDumper.java | 89 -------- 7 files changed, 405 insertions(+), 217 deletions(-) rename hadoop-yarn-project/hadoop-yarn/{hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java => hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java} (53%) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestLogDumper.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java index 4a1398b0c33..0d74d9fa3a2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java @@ -54,7 +54,7 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.yarn.logaggregation.LogDumper; +import org.apache.hadoop.yarn.logaggregation.LogCLIHelpers; import com.google.common.base.Charsets; @@ -359,7 +359,7 @@ public class CLI extends Configured implements Tool { JobID jobID = JobID.forName(jobid); TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskid); LogParams logParams = cluster.getLogParams(jobID, taskAttemptID); - LogDumper logDumper = new LogDumper(); + LogCLIHelpers logDumper = new LogCLIHelpers(); logDumper.setConf(getConf()); exitCode = logDumper.dumpAContainersLogs(logParams.getApplicationId(), logParams.getContainerId(), logParams.getNodeId(), diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index aeff5bf8e14..1ac823a55cd 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -153,6 +153,9 @@ Release 2.1.2 - UNRELEASED YARN-1271. "Text file busy" errors launching containers again (Sandy Ryza) + YARN-1131. $yarn logs command should return an appropriate error message if + YARN application is still running. (Siddharth Seth via hitesh) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index 8d369e8ad7f..97c2afe992b 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -210,7 +210,7 @@ elif [ "$COMMAND" = "jar" ] ; then CLASS=org.apache.hadoop.util.RunJar YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" elif [ "$COMMAND" = "logs" ] ; then - CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper + CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" elif [ "$COMMAND" = "daemonlog" ] ; then CLASS=org.apache.hadoop.log.LogLevel diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java similarity index 53% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java index 1e7ed44e2d3..eb6169cf368 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogDumper.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java @@ -16,45 +16,39 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.logaggregation; +package org.apache.hadoop.yarn.client.cli; -import java.io.DataInputStream; -import java.io.EOFException; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.PrintStream; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Tool; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.factories.RecordFactory; -import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; -import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogReader; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat; +import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils; +import org.apache.hadoop.yarn.logaggregation.LogCLIHelpers; import org.apache.hadoop.yarn.util.ConverterUtils; import com.google.common.annotations.VisibleForTesting; @Public @Evolving -public class LogDumper extends Configured implements Tool { +public class LogsCLI extends Configured implements Tool { private static final String CONTAINER_ID_OPTION = "containerId"; private static final String APPLICATION_ID_OPTION = "applicationId"; @@ -65,7 +59,9 @@ public class LogDumper extends Configured implements Tool { public int run(String[] args) throws Exception { Options opts = new Options(); - opts.addOption(APPLICATION_ID_OPTION, true, "ApplicationId (required)"); + Option appIdOpt = new Option(APPLICATION_ID_OPTION, true, "ApplicationId (required)"); + appIdOpt.setRequired(true); + opts.addOption(appIdOpt); opts.addOption(CONTAINER_ID_OPTION, true, "ContainerId (must be specified if node address is specified)"); opts.addOption(NODE_ADDRESS_OPTION, true, "NodeAddress in the format " @@ -99,28 +95,46 @@ public class LogDumper extends Configured implements Tool { nodeAddress = commandLine.getOptionValue(NODE_ADDRESS_OPTION); appOwner = commandLine.getOptionValue(APP_OWNER_OPTION); } catch (ParseException e) { - System.out.println("options parsing failed: " + e.getMessage()); + System.err.println("options parsing failed: " + e.getMessage()); printHelpMessage(printOpts); return -1; } if (appIdStr == null) { - System.out.println("ApplicationId cannot be null!"); + System.err.println("ApplicationId cannot be null!"); printHelpMessage(printOpts); return -1; } - RecordFactory recordFactory = - RecordFactoryProvider.getRecordFactory(getConf()); - ApplicationId appId = - ConverterUtils.toApplicationId(recordFactory, appIdStr); + ApplicationId appId = null; + try { + appId = ConverterUtils.toApplicationId(appIdStr); + } catch (Exception e) { + System.err.println("Invalid ApplicationId specified"); + return -1; + } + + try { + int resultCode = verifyApplicationState(appId); + if (resultCode != 0) { + System.out.println("Application has not completed." + + " Logs are only available after an application completes"); + return resultCode; + } + } catch (Exception e) { + System.err.println("Unable to get ApplicationState." + + " Attempting to fetch logs directly from the filesystem."); + } + LogCLIHelpers logCliHelper = new LogCLIHelpers(); + logCliHelper.setConf(getConf()); + if (appOwner == null || appOwner.isEmpty()) { appOwner = UserGroupInformation.getCurrentUser().getShortUserName(); } int resultCode = 0; if (containerIdStr == null && nodeAddress == null) { - resultCode = dumpAllContainersLogs(appId, appOwner, System.out); + resultCode = logCliHelper.dumpAllContainersLogs(appId, appOwner, System.out); } else if ((containerIdStr == null && nodeAddress != null) || (containerIdStr != null && nodeAddress == null)) { System.out.println("ContainerId or NodeAddress cannot be null!"); @@ -138,123 +152,49 @@ public class LogDumper extends Configured implements Tool { appOwner, ConverterUtils.toNodeId(nodeAddress), LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf()))); - resultCode = dumpAContainerLogs(containerIdStr, reader, System.out); + resultCode = logCliHelper.dumpAContainerLogs(containerIdStr, reader, System.out); } return resultCode; } - @Private - @VisibleForTesting - public int dumpAContainersLogs(String appId, String containerId, - String nodeId, String jobOwner) throws IOException { - Path remoteRootLogDir = - new Path(getConf().get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, - YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); - String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf()); - Path logPath = LogAggregationUtils.getRemoteNodeLogFileForApp( - remoteRootLogDir, ConverterUtils.toApplicationId(appId), jobOwner, - ConverterUtils.toNodeId(nodeId), suffix); - AggregatedLogFormat.LogReader reader; + private int verifyApplicationState(ApplicationId appId) throws IOException, + YarnException { + YarnClient yarnClient = createYarnClient(); + try { - reader = new AggregatedLogFormat.LogReader(getConf(), logPath); - } catch (FileNotFoundException fnfe) { - System.out.println("Logs not available at " + logPath.toString()); - System.out.println( - "Log aggregation has not completed or is not enabled."); - return -1; - } - return dumpAContainerLogs(containerId, reader, System.out); - } - - private int dumpAContainerLogs(String containerIdStr, - AggregatedLogFormat.LogReader reader, PrintStream out) - throws IOException { - DataInputStream valueStream; - LogKey key = new LogKey(); - valueStream = reader.next(key); - - while (valueStream != null && !key.toString().equals(containerIdStr)) { - // Next container - key = new LogKey(); - valueStream = reader.next(key); - } - - if (valueStream == null) { - System.out.println("Logs for container " + containerIdStr - + " are not present in this log-file."); - return -1; - } - - while (true) { - try { - LogReader.readAContainerLogsForALogType(valueStream, out); - } catch (EOFException eof) { + ApplicationReport appReport = yarnClient.getApplicationReport(appId); + switch (appReport.getYarnApplicationState()) { + case NEW: + case NEW_SAVING: + case ACCEPTED: + case SUBMITTED: + case RUNNING: + return -1; + case FAILED: + case FINISHED: + case KILLED: + default: break; + } + } finally { + yarnClient.close(); } return 0; } - - private int dumpAllContainersLogs(ApplicationId appId, String appOwner, - PrintStream out) throws IOException { - Path remoteRootLogDir = - new Path(getConf().get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, - YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); - String user = appOwner; - String logDirSuffix = - LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf()); - //TODO Change this to get a list of files from the LAS. - Path remoteAppLogDir = - LogAggregationUtils.getRemoteAppLogDir(remoteRootLogDir, appId, user, - logDirSuffix); - RemoteIterator nodeFiles; - try { - nodeFiles = FileContext.getFileContext().listStatus(remoteAppLogDir); - } catch (FileNotFoundException fnf) { - System.out.println("Logs not available at " - + remoteAppLogDir.toString()); - System.out.println( - "Log aggregation has not completed or is not enabled."); - return -1; - } - while (nodeFiles.hasNext()) { - FileStatus thisNodeFile = nodeFiles.next(); - AggregatedLogFormat.LogReader reader = - new AggregatedLogFormat.LogReader(getConf(), - new Path(remoteAppLogDir, thisNodeFile.getPath().getName())); - try { - - DataInputStream valueStream; - LogKey key = new LogKey(); - valueStream = reader.next(key); - - while (valueStream != null) { - String containerString = "\n\nContainer: " + key + " on " + thisNodeFile.getPath().getName(); - out.println(containerString); - out.println(StringUtils.repeat("=", containerString.length())); - while (true) { - try { - LogReader.readAContainerLogsForALogType(valueStream, out); - } catch (EOFException eof) { - break; - } - } - - // Next container - key = new LogKey(); - valueStream = reader.next(key); - } - } finally { - reader.close(); - } - } - return 0; + + @VisibleForTesting + protected YarnClient createYarnClient() { + YarnClient yarnClient = YarnClient.createYarnClient(); + yarnClient.init(getConf()); + yarnClient.start(); + return yarnClient; } public static void main(String[] args) throws Exception { Configuration conf = new YarnConfiguration(); - LogDumper logDumper = new LogDumper(); + LogsCLI logDumper = new LogsCLI(); logDumper.setConf(conf); int exitCode = logDumper.run(args); System.exit(exitCode); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java new file mode 100644 index 00000000000..45e112bc9c1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java @@ -0,0 +1,172 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.client.cli; + +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.PrintWriter; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.logaggregation.LogCLIHelpers; +import org.junit.Before; +import org.junit.Test; + +public class TestLogsCLI { + ByteArrayOutputStream sysOutStream; + private PrintStream sysOut; + + ByteArrayOutputStream sysErrStream; + private PrintStream sysErr; + + @Before + public void setUp() { + sysOutStream = new ByteArrayOutputStream(); + sysOut = new PrintStream(sysOutStream); + System.setOut(sysOut); + + sysErrStream = new ByteArrayOutputStream(); + sysErr = new PrintStream(sysErrStream); + System.setErr(sysErr); + } + + @Test(timeout = 5000l) + public void testFailResultCodes() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.setClass("fs.file.impl", LocalFileSystem.class, FileSystem.class); + LogCLIHelpers cliHelper = new LogCLIHelpers(); + cliHelper.setConf(conf); + YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.FINISHED); + LogsCLI dumper = new LogsCLIForTest(mockYarnClient); + dumper.setConf(conf); + + // verify dumping a non-existent application's logs returns a failure code + int exitCode = dumper.run( new String[] { + "-applicationId", "application_0_0" } ); + assertTrue("Should return an error code", exitCode != 0); + + // verify dumping a non-existent container log is a failure code + exitCode = cliHelper.dumpAContainersLogs("application_0_0", "container_0_0", + "nonexistentnode:1234", "nobody"); + assertTrue("Should return an error code", exitCode != 0); + } + + @Test(timeout = 5000l) + public void testInvalidApplicationId() throws Exception { + Configuration conf = new YarnConfiguration(); + YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.FINISHED); + LogsCLI cli = new LogsCLIForTest(mockYarnClient); + cli.setConf(conf); + + int exitCode = cli.run( new String[] { "-applicationId", "not_an_app_id"}); + assertTrue(exitCode == -1); + assertTrue(sysErrStream.toString().startsWith("Invalid ApplicationId specified")); + } + + @Test(timeout = 5000l) + public void testUnknownApplicationId() throws Exception { + Configuration conf = new YarnConfiguration(); + YarnClient mockYarnClient = createMockYarnClientUnknownApp(); + LogsCLI cli = new LogsCLIForTest(mockYarnClient); + cli.setConf(conf); + + int exitCode = cli.run(new String[] { "-applicationId", + ApplicationId.newInstance(1, 1).toString() }); + + // Error since no logs present for the app. + assertTrue(exitCode != 0); + assertTrue(sysErrStream.toString().startsWith( + "Unable to get ApplicationState")); + } + + @Test(timeout = 5000l) + public void testHelpMessage() throws Exception { + Configuration conf = new YarnConfiguration(); + YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.FINISHED); + LogsCLI dumper = new LogsCLIForTest(mockYarnClient); + dumper.setConf(conf); + + int exitCode = dumper.run(new String[]{}); + assertTrue(exitCode == -1); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintWriter pw = new PrintWriter(baos); + pw.println("Retrieve logs for completed YARN applications."); + pw.println("usage: yarn logs -applicationId [OPTIONS]"); + pw.println(); + pw.println("general options are:"); + pw.println(" -appOwner AppOwner (assumed to be current user if"); + pw.println(" not specified)"); + pw.println(" -containerId ContainerId (must be specified if node"); + pw.println(" address is specified)"); + pw.println(" -nodeAddress NodeAddress in the format nodename:port"); + pw.println(" (must be specified if container id is"); + pw.println(" specified)"); + pw.close(); + String appReportStr = baos.toString("UTF-8"); + Assert.assertEquals(appReportStr, sysOutStream.toString()); + } + + private YarnClient createMockYarnClient(YarnApplicationState appState) + throws YarnException, IOException { + YarnClient mockClient = mock(YarnClient.class); + ApplicationReport mockAppReport = mock(ApplicationReport.class); + doReturn(appState).when(mockAppReport).getYarnApplicationState(); + doReturn(mockAppReport).when(mockClient).getApplicationReport( + any(ApplicationId.class)); + return mockClient; + } + + private YarnClient createMockYarnClientUnknownApp() throws YarnException, + IOException { + YarnClient mockClient = mock(YarnClient.class); + doThrow(new YarnException("Unknown AppId")).when(mockClient) + .getApplicationReport(any(ApplicationId.class)); + return mockClient; + } + + private static class LogsCLIForTest extends LogsCLI { + + private YarnClient yarnClient; + + public LogsCLIForTest(YarnClient yarnClient) { + super(); + this.yarnClient = yarnClient; + } + + protected YarnClient createYarnClient() { + return yarnClient; + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java new file mode 100644 index 00000000000..908a0fd8c30 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.logaggregation; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogReader; +import org.apache.hadoop.yarn.util.ConverterUtils; + +import com.google.common.annotations.VisibleForTesting; + +public class LogCLIHelpers implements Configurable { + + private Configuration conf; + + @Private + @VisibleForTesting + public int dumpAContainersLogs(String appId, String containerId, + String nodeId, String jobOwner) throws IOException { + Path remoteRootLogDir = new Path(getConf().get( + YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); + String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf()); + Path logPath = LogAggregationUtils.getRemoteNodeLogFileForApp( + remoteRootLogDir, ConverterUtils.toApplicationId(appId), jobOwner, + ConverterUtils.toNodeId(nodeId), suffix); + AggregatedLogFormat.LogReader reader; + try { + reader = new AggregatedLogFormat.LogReader(getConf(), logPath); + } catch (FileNotFoundException fnfe) { + System.out.println("Logs not available at " + logPath.toString()); + System.out + .println("Log aggregation has not completed or is not enabled."); + return -1; + } + return dumpAContainerLogs(containerId, reader, System.out); + } + + @Private + public int dumpAContainerLogs(String containerIdStr, + AggregatedLogFormat.LogReader reader, PrintStream out) throws IOException { + DataInputStream valueStream; + LogKey key = new LogKey(); + valueStream = reader.next(key); + + while (valueStream != null && !key.toString().equals(containerIdStr)) { + // Next container + key = new LogKey(); + valueStream = reader.next(key); + } + + if (valueStream == null) { + System.out.println("Logs for container " + containerIdStr + + " are not present in this log-file."); + return -1; + } + + while (true) { + try { + LogReader.readAContainerLogsForALogType(valueStream, out); + } catch (EOFException eof) { + break; + } + } + return 0; + } + + @Private + public int dumpAllContainersLogs(ApplicationId appId, String appOwner, + PrintStream out) throws IOException { + Path remoteRootLogDir = new Path(getConf().get( + YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); + String user = appOwner; + String logDirSuffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf()); + // TODO Change this to get a list of files from the LAS. + Path remoteAppLogDir = LogAggregationUtils.getRemoteAppLogDir( + remoteRootLogDir, appId, user, logDirSuffix); + RemoteIterator nodeFiles; + try { + nodeFiles = FileContext.getFileContext().listStatus(remoteAppLogDir); + } catch (FileNotFoundException fnf) { + System.out.println("Logs not available at " + remoteAppLogDir.toString()); + System.out + .println("Log aggregation has not completed or is not enabled."); + return -1; + } + while (nodeFiles.hasNext()) { + FileStatus thisNodeFile = nodeFiles.next(); + AggregatedLogFormat.LogReader reader = new AggregatedLogFormat.LogReader( + getConf(), new Path(remoteAppLogDir, thisNodeFile.getPath().getName())); + try { + + DataInputStream valueStream; + LogKey key = new LogKey(); + valueStream = reader.next(key); + + while (valueStream != null) { + String containerString = "\n\nContainer: " + key + " on " + + thisNodeFile.getPath().getName(); + out.println(containerString); + out.println(StringUtils.repeat("=", containerString.length())); + while (true) { + try { + LogReader.readAContainerLogsForALogType(valueStream, out); + } catch (EOFException eof) { + break; + } + } + + // Next container + key = new LogKey(); + valueStream = reader.next(key); + } + } finally { + reader.close(); + } + } + return 0; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return this.conf; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestLogDumper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestLogDumper.java deleted file mode 100644 index 683b80c2c5f..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestLogDumper.java +++ /dev/null @@ -1,89 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.yarn.logaggregation; - -import static org.junit.Assert.assertTrue; -import java.io.ByteArrayOutputStream; -import java.io.PrintStream; -import java.io.PrintWriter; - -import junit.framework.Assert; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.junit.Before; -import org.junit.Test; - -public class TestLogDumper { - ByteArrayOutputStream sysOutStream; - private PrintStream sysOut; - - @Before - public void setUp() { - sysOutStream = new ByteArrayOutputStream(); - sysOut = new PrintStream(sysOutStream); - System.setOut(sysOut); - } - - @Test - public void testFailResultCodes() throws Exception { - Configuration conf = new YarnConfiguration(); - conf.setClass("fs.file.impl", LocalFileSystem.class, FileSystem.class); - LogDumper dumper = new LogDumper(); - dumper.setConf(conf); - - // verify dumping a non-existent application's logs returns a failure code - int exitCode = dumper.run( new String[] { - "-applicationId", "application_0_0" } ); - assertTrue("Should return an error code", exitCode != 0); - - // verify dumping a non-existent container log is a failure code - exitCode = dumper.dumpAContainersLogs("application_0_0", "container_0_0", - "nonexistentnode:1234", "nobody"); - assertTrue("Should return an error code", exitCode != 0); - } - - @Test - public void testHelpMessage() throws Exception { - Configuration conf = new YarnConfiguration(); - LogDumper dumper = new LogDumper(); - dumper.setConf(conf); - - int exitCode = dumper.run(new String[]{}); - assertTrue(exitCode == -1); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - PrintWriter pw = new PrintWriter(baos); - pw.println("Retrieve logs for completed YARN applications."); - pw.println("usage: yarn logs -applicationId [OPTIONS]"); - pw.println(); - pw.println("general options are:"); - pw.println(" -appOwner AppOwner (assumed to be current user if"); - pw.println(" not specified)"); - pw.println(" -containerId ContainerId (must be specified if node"); - pw.println(" address is specified)"); - pw.println(" -nodeAddress NodeAddress in the format nodename:port"); - pw.println(" (must be specified if container id is"); - pw.println(" specified)"); - pw.close(); - String appReportStr = baos.toString("UTF-8"); - Assert.assertEquals(appReportStr, sysOutStream.toString()); - } -} From 65cd7bf6b120722ee6054393520c349eeacd4969 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Fri, 4 Oct 2013 05:37:54 +0000 Subject: [PATCH 045/133] MAPREDUCE-5442. $HADOOP_MAPRED_HOME/$HADOOP_CONF_DIR setting not working on Windows. Contributed by Yingda Chen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529077 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../org/apache/hadoop/mapreduce/v2/util/MRApps.java | 4 +++- .../apache/hadoop/mapreduce/v2/util/TestMRApps.java | 4 +++- .../org/apache/hadoop/mapreduce/MRJobConfig.java | 11 +++++++---- .../src/main/resources/mapred-default.xml | 13 +++++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 24cd4fbeee0..7ad5577396a 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -275,6 +275,9 @@ Release 2.1.2 - UNRELEASED MAPREDUCE-5489. MR jobs hangs as it does not use the node-blacklisting feature in RM requests (Zhijie Shen via bikas) + MAPREDUCE-5442. $HADOOP_MAPRED_HOME/$HADOOP_CONF_DIR setting not working on + Windows. (Yingda Chen via cnauroth) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index 57c4eaa5669..650bdd499eb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -49,6 +49,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.ContainerLogAppender; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; @@ -181,7 +182,8 @@ public class MRApps extends Apps { boolean foundFrameworkInClasspath = (frameworkName == null); for (String c : conf.getStrings( MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, - MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)) { + StringUtils.getStrings( + MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH))){ Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c .trim()); if (!foundFrameworkInClasspath) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java index e1d32f7aa9c..5cf2397c4d4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java @@ -191,7 +191,9 @@ public class TestMRApps { } assertTrue(environment.get("CLASSPATH").contains(yarnAppClasspath)); String mrAppClasspath = - job.getConfiguration().get(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH); + job.getConfiguration().get( + MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, + MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH); if (mrAppClasspath != null) { mrAppClasspath = mrAppClasspath.replaceAll(",\\s*", File.pathSeparator) .trim(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index b8a40d178eb..738cd6edab1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -19,6 +19,7 @@ package org.apache.hadoop.mapreduce; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.Shell; @InterfaceAudience.Private @InterfaceStability.Evolving @@ -658,10 +659,12 @@ public interface MRJobConfig { /** * Default CLASSPATH for all YARN MapReduce applications. */ - public static final String[] DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH = { - "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*", - "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*", - }; + public final String + DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH = Shell.WINDOWS ? + "%HADOOP_MAPRED_HOME%\\share\\hadoop\\mapreduce\\*," + + "%HADOOP_MAPRED_HOME%\\share\\hadoop\\mapreduce\\lib\\*" : + "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*," + + "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*"; public static final String WORKFLOW_ID = "mapreduce.workflow.id"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 657805daea0..0e1b21ec0d1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1026,9 +1026,18 @@ CLASSPATH for MR applications. A comma-separated list of CLASSPATH entries. If mapreduce.application.framework is set then this must specify the appropriate classpath for that archive, and the name of - the archive must be present in the classpath. + the archive must be present in the classpath. + When this value is empty, the following default CLASSPATH for MR + applications would be used. + For Linux: + $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*, + $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*. + For Windows: + %HADOOP_MAPRED_HOME%/share/hadoop/mapreduce/*, + %HADOOP_MAPRED_HOME%/share/hadoop/mapreduce/lib/*. + mapreduce.application.classpath - $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + From 6be30a7799fadb75bfe58ebbfba1ecffd0c95462 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Fri, 4 Oct 2013 05:59:07 +0000 Subject: [PATCH 046/133] YARN-1219. FSDownload changes file suffix making FileUtil.unTar() throw exception. Contributed by Shanyu Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529084 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../apache/hadoop/yarn/util/FSDownload.java | 2 +- .../hadoop/yarn/util/TestFSDownload.java | 191 +++++++----------- 3 files changed, 78 insertions(+), 118 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1ac823a55cd..9b8007b9fb0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -156,6 +156,9 @@ Release 2.1.2 - UNRELEASED YARN-1131. $yarn logs command should return an appropriate error message if YARN application is still running. (Siddharth Seth via hitesh) + YARN-1219. FSDownload changes file suffix making FileUtil.unTar() throw + exception. (Shanyu Zhao via cnauroth) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java index 028276b7fe0..36dfc6bc8e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java @@ -170,7 +170,7 @@ public class FSDownload implements Callable { private Path copy(Path sCopy, Path dstdir) throws IOException { FileSystem sourceFs = sCopy.getFileSystem(conf); - Path dCopy = new Path(dstdir, sCopy.getName() + ".tmp"); + Path dCopy = new Path(dstdir, "tmp_"+sCopy.getName()); FileStatus sStat = sourceFs.getFileStatus(sCopy); if (sStat.getModificationTime() != resource.getTimestamp()) { throw new IOException("Resource " + sCopy + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java index f3f9f7d0c34..86909e7322d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java @@ -41,6 +41,7 @@ import java.util.jar.JarOutputStream; import java.util.jar.Manifest; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import java.util.zip.GZIPOutputStream; import junit.framework.Assert; @@ -72,6 +73,9 @@ public class TestFSDownload { private static final Log LOG = LogFactory.getLog(TestFSDownload.class); private static AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); + private enum TEST_FILE_TYPE { + TAR, JAR, ZIP, TGZ + }; @AfterClass public static void deleteTestDir() throws IOException { @@ -121,7 +125,7 @@ public class TestFSDownload { ret.setPattern("classes/.*"); return ret; } - + static LocalResource createTarFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis) throws IOException, URISyntaxException { @@ -149,7 +153,35 @@ public class TestFSDownload { .getModificationTime()); return ret; } - + + static LocalResource createTgzFile(FileContext files, Path p, int len, + Random r, LocalResourceVisibility vis) throws IOException, + URISyntaxException { + byte[] bytes = new byte[len]; + r.nextBytes(bytes); + + File gzipFile = new File(p.toUri().getPath() + ".tar.gz"); + gzipFile.createNewFile(); + TarArchiveOutputStream out = new TarArchiveOutputStream( + new GZIPOutputStream(new FileOutputStream(gzipFile))); + TarArchiveEntry entry = new TarArchiveEntry(p.getName()); + entry.setSize(bytes.length); + out.putArchiveEntry(entry); + out.write(bytes); + out.closeArchiveEntry(); + out.close(); + + LocalResource ret = recordFactory.newRecordInstance(LocalResource.class); + ret.setResource(ConverterUtils.getYarnUrlFromPath(new Path(p.toString() + + ".tar.gz"))); + ret.setSize(len); + ret.setType(LocalResourceType.ARCHIVE); + ret.setVisibility(vis); + ret.setTimestamp(files.getFileStatus(new Path(p.toString() + ".tar.gz")) + .getModificationTime()); + return ret; + } + static LocalResource createJarFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis) throws IOException, URISyntaxException { @@ -175,7 +207,7 @@ public class TestFSDownload { .getModificationTime()); return ret; } - + static LocalResource createZipFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis) throws IOException, URISyntaxException { @@ -201,7 +233,7 @@ public class TestFSDownload { .getModificationTime()); return ret; } - + @Test (timeout=10000) public void testDownloadBadPublic() throws IOException, URISyntaxException, InterruptedException { @@ -252,7 +284,7 @@ public class TestFSDownload { Assert.assertTrue(e.getCause() instanceof IOException); } } - + @Test (timeout=10000) public void testDownload() throws IOException, URISyntaxException, InterruptedException { @@ -326,10 +358,9 @@ public class TestFSDownload { throw new IOException("Failed exec", e); } } - - @Test (timeout=10000) - public void testDownloadArchive() throws IOException, URISyntaxException, - InterruptedException { + + private void downloadWithFileType(TEST_FILE_TYPE fileType) throws IOException, + URISyntaxException, InterruptedException{ Configuration conf = new Configuration(); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); FileContext files = FileContext.getLocalFSFileContext(conf); @@ -352,7 +383,22 @@ public class TestFSDownload { LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE; Path p = new Path(basedir, "" + 1); - LocalResource rsrc = createTarFile(files, p, size, rand, vis); + LocalResource rsrc = null; + switch (fileType) { + case TAR: + rsrc = createTarFile(files, p, size, rand, vis); + break; + case JAR: + rsrc = createJarFile(files, p, size, rand, vis); + rsrc.setType(LocalResourceType.PATTERN); + break; + case ZIP: + rsrc = createZipFile(files, p, size, rand, vis); + break; + case TGZ: + rsrc = createTgzFile(files, p, size, rand, vis); + break; + } Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf); destPath = new Path (destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())); @@ -371,7 +417,7 @@ public class TestFSDownload { FileStatus[] childFiles = files.getDefaultFileSystem().listStatus( filestatus.getPath()); for (FileStatus childfile : childFiles) { - if (childfile.getPath().getName().equalsIgnoreCase("1.tar.tmp")) { + if (childfile.getPath().getName().startsWith("tmp")) { Assert.fail("Tmp File should not have been there " + childfile.getPath()); } @@ -384,118 +430,29 @@ public class TestFSDownload { } @Test (timeout=10000) + public void testDownloadArchive() throws IOException, URISyntaxException, + InterruptedException { + downloadWithFileType(TEST_FILE_TYPE.TAR); + } + + @Test (timeout=10000) public void testDownloadPatternJar() throws IOException, URISyntaxException, InterruptedException { - Configuration conf = new Configuration(); - conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); - FileContext files = FileContext.getLocalFSFileContext(conf); - final Path basedir = files.makeQualified(new Path("target", - TestFSDownload.class.getSimpleName())); - files.mkdir(basedir, null, true); - conf.setStrings(TestFSDownload.class.getName(), basedir.toString()); - - Random rand = new Random(); - long sharedSeed = rand.nextLong(); - rand.setSeed(sharedSeed); - System.out.println("SEED: " + sharedSeed); - - Map> pending = new HashMap>(); - ExecutorService exec = Executors.newSingleThreadExecutor(); - LocalDirAllocator dirs = new LocalDirAllocator( - TestFSDownload.class.getName()); - - int size = rand.nextInt(512) + 512; - LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE; - - Path p = new Path(basedir, "" + 1); - LocalResource rsrcjar = createJarFile(files, p, size, rand, vis); - rsrcjar.setType(LocalResourceType.PATTERN); - Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf); - destPathjar = new Path (destPathjar, - Long.toString(uniqueNumberGenerator.incrementAndGet())); - FSDownload fsdjar = new FSDownload(files, - UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrcjar); - pending.put(rsrcjar, exec.submit(fsdjar)); - exec.shutdown(); - while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)); - Assert.assertTrue(pending.get(rsrcjar).isDone()); - - try { - FileStatus[] filesstatus = files.getDefaultFileSystem().listStatus( - basedir); - for (FileStatus filestatus : filesstatus) { - if (filestatus.isDirectory()) { - FileStatus[] childFiles = files.getDefaultFileSystem().listStatus( - filestatus.getPath()); - for (FileStatus childfile : childFiles) { - if (childfile.getPath().getName().equalsIgnoreCase("1.jar.tmp")) { - Assert.fail("Tmp File should not have been there " - + childfile.getPath()); - } - } - } - } - }catch (Exception e) { - throw new IOException("Failed exec", e); - } + downloadWithFileType(TEST_FILE_TYPE.JAR); } - + @Test (timeout=10000) public void testDownloadArchiveZip() throws IOException, URISyntaxException, InterruptedException { - Configuration conf = new Configuration(); - conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); - FileContext files = FileContext.getLocalFSFileContext(conf); - final Path basedir = files.makeQualified(new Path("target", - TestFSDownload.class.getSimpleName())); - files.mkdir(basedir, null, true); - conf.setStrings(TestFSDownload.class.getName(), basedir.toString()); - - Random rand = new Random(); - long sharedSeed = rand.nextLong(); - rand.setSeed(sharedSeed); - System.out.println("SEED: " + sharedSeed); - - Map> pending = new HashMap>(); - ExecutorService exec = Executors.newSingleThreadExecutor(); - LocalDirAllocator dirs = new LocalDirAllocator( - TestFSDownload.class.getName()); - - int size = rand.nextInt(512) + 512; - LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE; - - Path p = new Path(basedir, "" + 1); - LocalResource rsrczip = createZipFile(files, p, size, rand, vis); - Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf); - destPathjar = new Path (destPathjar, - Long.toString(uniqueNumberGenerator.incrementAndGet())); - FSDownload fsdzip = new FSDownload(files, - UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrczip); - pending.put(rsrczip, exec.submit(fsdzip)); - exec.shutdown(); - while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)); - Assert.assertTrue(pending.get(rsrczip).isDone()); - - try { - FileStatus[] filesstatus = files.getDefaultFileSystem().listStatus( - basedir); - for (FileStatus filestatus : filesstatus) { - if (filestatus.isDirectory()) { - FileStatus[] childFiles = files.getDefaultFileSystem().listStatus( - filestatus.getPath()); - for (FileStatus childfile : childFiles) { - if (childfile.getPath().getName().equalsIgnoreCase("1.gz.tmp")) { - Assert.fail("Tmp File should not have been there " - + childfile.getPath()); - } - } - } - } - }catch (Exception e) { - throw new IOException("Failed exec", e); - } + downloadWithFileType(TEST_FILE_TYPE.ZIP); } - + + @Test (timeout=10000) + public void testDownloadArchiveTgz() throws IOException, URISyntaxException, + InterruptedException { + downloadWithFileType(TEST_FILE_TYPE.TGZ); + } + private void verifyPermsRecursively(FileSystem fs, FileContext files, Path p, LocalResourceVisibility vis) throws IOException { @@ -527,7 +484,7 @@ public class TestFSDownload { } } } - + @Test (timeout=10000) public void testDirDownload() throws IOException, InterruptedException { Configuration conf = new Configuration(); From d4324eef14782d3cde6570ee910c45d8fdfce6ba Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Fri, 4 Oct 2013 17:26:56 +0000 Subject: [PATCH 047/133] MAPREDUCE-5533. Fixed MR speculation code to track any TaskAttempts that aren't heart-beating for a while, so that we can aggressively speculate instead of waiting for task-timeout. Contributed by Xuan Gong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529229 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 + .../v2/app/speculate/DefaultSpeculator.java | 83 +++++++ .../apache/hadoop/mapreduce/v2/app/MRApp.java | 20 +- .../v2/TestSpeculativeExecutionWithMRApp.java | 220 ++++++++++++++++++ 4 files changed, 320 insertions(+), 7 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 7ad5577396a..8c10325e387 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -278,6 +278,10 @@ Release 2.1.2 - UNRELEASED MAPREDUCE-5442. $HADOOP_MAPRED_HOME/$HADOOP_CONF_DIR setting not working on Windows. (Yingda Chen via cnauroth) + MAPREDUCE-5533. Fixed MR speculation code to track any TaskAttempts that + aren't heart-beating for a while, so that we can aggressively speculate + instead of waiting for task-timeout (Xuan Gong via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java index 532a9a2ee73..80e38334730 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java @@ -78,6 +78,16 @@ public class DefaultSpeculator extends AbstractService implements private final Map pendingSpeculations = new ConcurrentHashMap(); + // Used to track any TaskAttempts that aren't heart-beating for a while, so + // that we can aggressively speculate instead of waiting for task-timeout. + private final ConcurrentMap + runningTaskAttemptStatistics = new ConcurrentHashMap(); + // Regular heartbeat from tasks is every 3 secs. So if we don't get a + // heartbeat in 9 secs (3 heartbeats), we simulate a heartbeat with no change + // in progress. + private static final long MAX_WAITTING_TIME_FOR_HEARTBEAT = 9 * 1000; + // These are the current needs, not the initial needs. For each job, these // record the number of attempts that exist and that are actively // waiting for a container [as opposed to running or finished] @@ -329,6 +339,9 @@ public class DefaultSpeculator extends AbstractService implements runningTasks.putIfAbsent(taskID, Boolean.TRUE); } else { runningTasks.remove(taskID, Boolean.TRUE); + if (!stateString.equals(TaskAttemptState.STARTING.name())) { + runningTaskAttemptStatistics.remove(attemptID); + } } } @@ -389,6 +402,33 @@ public class DefaultSpeculator extends AbstractService implements long estimatedReplacementEndTime = now + estimator.estimatedNewAttemptRuntime(taskID); + float progress = taskAttempt.getProgress(); + TaskAttemptHistoryStatistics data = + runningTaskAttemptStatistics.get(runningTaskAttemptID); + if (data == null) { + runningTaskAttemptStatistics.put(runningTaskAttemptID, + new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now)); + } else { + if (estimatedRunTime == data.getEstimatedRunTime() + && progress == data.getProgress()) { + // Previous stats are same as same stats + if (data.notHeartbeatedInAWhile(now)) { + // Stats have stagnated for a while, simulate heart-beat. + TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus(); + taskAttemptStatus.id = runningTaskAttemptID; + taskAttemptStatus.progress = progress; + taskAttemptStatus.taskState = taskAttempt.getState(); + // Now simulate the heart-beat + handleAttempt(taskAttemptStatus); + } + } else { + // Stats have changed - update our data structure + data.setEstimatedRunTime(estimatedRunTime); + data.setProgress(progress); + data.resetHeartBeatTime(now); + } + } + if (estimatedEndTime < now) { return PROGRESS_IS_GOOD; } @@ -511,4 +551,47 @@ public class DefaultSpeculator extends AbstractService implements // We'll try to issue one map and one reduce speculation per job per run return maybeScheduleAMapSpeculation() + maybeScheduleAReduceSpeculation(); } + + static class TaskAttemptHistoryStatistics { + + private long estimatedRunTime; + private float progress; + private long lastHeartBeatTime; + + public TaskAttemptHistoryStatistics(long estimatedRunTime, float progress, + long nonProgressStartTime) { + this.estimatedRunTime = estimatedRunTime; + this.progress = progress; + resetHeartBeatTime(nonProgressStartTime); + } + + public long getEstimatedRunTime() { + return this.estimatedRunTime; + } + + public float getProgress() { + return this.progress; + } + + public void setEstimatedRunTime(long estimatedRunTime) { + this.estimatedRunTime = estimatedRunTime; + } + + public void setProgress(float progress) { + this.progress = progress; + } + + public boolean notHeartbeatedInAWhile(long now) { + if (now - lastHeartBeatTime <= MAX_WAITTING_TIME_FOR_HEARTBEAT) { + return false; + } else { + resetHeartBeatTime(now); + return true; + } + } + + public void resetHeartBeatTime(long lastHeartBeatTime) { + this.lastHeartBeatTime = lastHeartBeatTime; + } + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java index 2a009955e3d..3a7e865c7bf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java @@ -263,16 +263,22 @@ public class MRApp extends MRAppMaster { } public Job submit(Configuration conf) throws Exception { + //TODO: fix the bug where the speculator gets events with + //not-fully-constructed objects. For now, disable speculative exec + return submit(conf, false, false); + } + + public Job submit(Configuration conf, boolean mapSpeculative, + boolean reduceSpeculative) throws Exception { String user = conf.get(MRJobConfig.USER_NAME, UserGroupInformation - .getCurrentUser().getShortUserName()); + .getCurrentUser().getShortUserName()); conf.set(MRJobConfig.USER_NAME, user); conf.set(MRJobConfig.MR_AM_STAGING_DIR, testAbsPath.toString()); conf.setBoolean(MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR, true); - //TODO: fix the bug where the speculator gets events with - //not-fully-constructed objects. For now, disable speculative exec - LOG.info("****DISABLING SPECULATIVE EXECUTION*****"); - conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); - conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); + // TODO: fix the bug where the speculator gets events with + // not-fully-constructed objects. For now, disable speculative exec + conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapSpeculative); + conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, reduceSpeculative); init(conf); start(); @@ -281,7 +287,7 @@ public class MRApp extends MRAppMaster { // Write job.xml String jobFile = MRApps.getJobFile(conf, user, - TypeConverter.fromYarn(job.getID())); + TypeConverter.fromYarn(job.getID())); LOG.info("Writing job conf to " + jobFile); new File(jobFile).getParentFile().mkdirs(); conf.writeXml(new FileOutputStream(jobFile)); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java new file mode 100644 index 00000000000..37d09e0da38 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java @@ -0,0 +1,220 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2; + +import java.util.Iterator; +import java.util.Map; +import java.util.Random; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskState; +import org.apache.hadoop.mapreduce.v2.app.ControlledClock; +import org.apache.hadoop.mapreduce.v2.app.MRApp; +import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.app.job.Task; +import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; +import org.junit.Test; + +@SuppressWarnings({ "unchecked", "rawtypes" }) +public class TestSpeculativeExecutionWithMRApp { + + private static final int NUM_MAPPERS = 5; + private static final int NUM_REDUCERS = 0; + + @Test(timeout = 60000) + public void testSpeculateSuccessfulWithoutUpdateEvents() throws Exception { + + Clock actualClock = new SystemClock(); + ControlledClock clock = new ControlledClock(actualClock); + clock.setTime(System.currentTimeMillis()); + + MRApp app = + new MRApp(NUM_MAPPERS, NUM_REDUCERS, false, "test", true, clock); + Job job = app.submit(new Configuration(), true, true); + app.waitForState(job, JobState.RUNNING); + + Map tasks = job.getTasks(); + Assert.assertEquals("Num tasks is not correct", NUM_MAPPERS + NUM_REDUCERS, + tasks.size()); + Iterator taskIter = tasks.values().iterator(); + while (taskIter.hasNext()) { + app.waitForState(taskIter.next(), TaskState.RUNNING); + } + + // Process the update events + clock.setTime(System.currentTimeMillis() + 2000); + EventHandler appEventHandler = app.getContext().getEventHandler(); + for (Map.Entry mapTask : tasks.entrySet()) { + for (Map.Entry taskAttempt : mapTask + .getValue().getAttempts().entrySet()) { + TaskAttemptStatus status = + createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.8, + TaskAttemptState.RUNNING); + TaskAttemptStatusUpdateEvent event = + new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status); + appEventHandler.handle(event); + } + } + + Random generator = new Random(); + Object[] taskValues = tasks.values().toArray(); + Task taskToBeSpeculated = + (Task) taskValues[generator.nextInt(taskValues.length)]; + + // Other than one random task, finish every other task. + for (Map.Entry mapTask : tasks.entrySet()) { + for (Map.Entry taskAttempt : mapTask + .getValue().getAttempts().entrySet()) { + if (mapTask.getKey() != taskToBeSpeculated.getID()) { + appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), + TaskAttemptEventType.TA_DONE)); + appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), + TaskAttemptEventType.TA_CONTAINER_CLEANED)); + app.waitForState(taskAttempt.getValue(), TaskAttemptState.SUCCEEDED); + } + } + } + + int maxTimeWait = 10; + boolean successfullySpeculated = false; + while (maxTimeWait > 0 && !successfullySpeculated) { + if (taskToBeSpeculated.getAttempts().size() != 2) { + Thread.sleep(1000); + clock.setTime(System.currentTimeMillis() + 20000); + } else { + successfullySpeculated = true; + } + maxTimeWait--; + } + Assert + .assertTrue("Couldn't speculate successfully", successfullySpeculated); + } + + @Test(timeout = 60000) + public void testSepculateSuccessfulWithUpdateEvents() throws Exception { + + Clock actualClock = new SystemClock(); + ControlledClock clock = new ControlledClock(actualClock); + clock.setTime(System.currentTimeMillis()); + + MRApp app = + new MRApp(NUM_MAPPERS, NUM_REDUCERS, false, "test", true, clock); + Job job = app.submit(new Configuration(), true, true); + app.waitForState(job, JobState.RUNNING); + + Map tasks = job.getTasks(); + Assert.assertEquals("Num tasks is not correct", NUM_MAPPERS + NUM_REDUCERS, + tasks.size()); + Iterator taskIter = tasks.values().iterator(); + while (taskIter.hasNext()) { + app.waitForState(taskIter.next(), TaskState.RUNNING); + } + + // Process the update events + clock.setTime(System.currentTimeMillis() + 1000); + EventHandler appEventHandler = app.getContext().getEventHandler(); + for (Map.Entry mapTask : tasks.entrySet()) { + for (Map.Entry taskAttempt : mapTask + .getValue().getAttempts().entrySet()) { + TaskAttemptStatus status = + createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.5, + TaskAttemptState.RUNNING); + TaskAttemptStatusUpdateEvent event = + new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status); + appEventHandler.handle(event); + } + } + + Task speculatedTask = null; + int numTasksToFinish = NUM_MAPPERS + NUM_REDUCERS - 1; + clock.setTime(System.currentTimeMillis() + 1000); + for (Map.Entry task : tasks.entrySet()) { + for (Map.Entry taskAttempt : task.getValue() + .getAttempts().entrySet()) { + if (numTasksToFinish > 0) { + appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), + TaskAttemptEventType.TA_DONE)); + appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), + TaskAttemptEventType.TA_CONTAINER_CLEANED)); + numTasksToFinish--; + app.waitForState(taskAttempt.getValue(), TaskAttemptState.SUCCEEDED); + } else { + // The last task is chosen for speculation + TaskAttemptStatus status = + createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.75, + TaskAttemptState.RUNNING); + speculatedTask = task.getValue(); + TaskAttemptStatusUpdateEvent event = + new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status); + appEventHandler.handle(event); + } + } + } + + clock.setTime(System.currentTimeMillis() + 15000); + for (Map.Entry task : tasks.entrySet()) { + for (Map.Entry taskAttempt : task.getValue() + .getAttempts().entrySet()) { + if (taskAttempt.getValue().getState() != TaskAttemptState.SUCCEEDED) { + TaskAttemptStatus status = + createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.75, + TaskAttemptState.RUNNING); + TaskAttemptStatusUpdateEvent event = + new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status); + appEventHandler.handle(event); + } + } + } + + int maxTimeWait = 5; + boolean successfullySpeculated = false; + while (maxTimeWait > 0 && !successfullySpeculated) { + if (speculatedTask.getAttempts().size() != 2) { + Thread.sleep(1000); + } else { + successfullySpeculated = true; + } + maxTimeWait--; + } + Assert + .assertTrue("Couldn't speculate successfully", successfullySpeculated); + } + + private TaskAttemptStatus createTaskAttemptStatus(TaskAttemptId id, + float progress, TaskAttemptState state) { + TaskAttemptStatus status = new TaskAttemptStatus(); + status.id = id; + status.progress = progress; + status.taskState = state; + return status; + } +} From cbab04727bf24ec77f34677d0060cfa54263faf5 Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Fri, 4 Oct 2013 18:40:18 +0000 Subject: [PATCH 048/133] YARN-1232. Configuration to support multiple RMs (Karthik Kambatla via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529251 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../org/apache/hadoop/yarn/conf/HAUtil.java | 121 ++++++++++++++++++ .../hadoop/yarn/conf/YarnConfiguration.java | 27 +++- .../hadoop/yarn/client/ClientRMProxy.java | 7 +- .../src/main/resources/yarn-default.xml | 27 +++- .../apache/hadoop/yarn/conf/TestHAUtil.java | 79 ++++++++++++ .../hadoop/yarn/server/api/ServerRMProxy.java | 8 +- .../resourcemanager/RMHAProtocolService.java | 18 ++- .../resourcemanager/ResourceManager.java | 4 + .../yarn/server/resourcemanager/TestRMHA.java | 10 ++ 10 files changed, 286 insertions(+), 18 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestHAUtil.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 9b8007b9fb0..e249e2c13e2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -48,6 +48,9 @@ Release 2.3.0 - UNRELEASED YARN-1199. Make NM/RM Versions Available (Mit Desai via jeagles) + YARN-1232. Configuration to support multiple RMs (Karthik Kambatla via + bikas) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java new file mode 100644 index 00000000000..18f98961db6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.conf; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +@InterfaceAudience.Private +public class HAUtil { + private static Log LOG = LogFactory.getLog(HAUtil.class); + + public static final List RPC_ADDRESS_CONF_KEYS = + Collections.unmodifiableList(Arrays.asList( + YarnConfiguration.RM_ADDRESS, + YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.RM_ADMIN_ADDRESS, + YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, + YarnConfiguration.RM_WEBAPP_ADDRESS)); + + private HAUtil() { /* Hidden constructor */ } + + private static void throwBadConfigurationException(String msg) { + throw new YarnRuntimeException("Invalid configuration! " + msg); + } + + /** + * Returns true if Resource Manager HA is configured. + * + * @param conf Configuration + * @return true if HA is configured in the configuration; else false. + */ + public static boolean isHAEnabled(Configuration conf) { + return conf.getBoolean(YarnConfiguration.RM_HA_ENABLED, + YarnConfiguration.DEFAULT_RM_HA_ENABLED); + } + + public static Collection getRMHAIds(Configuration conf) { + return conf.getTrimmedStringCollection(YarnConfiguration.RM_HA_IDS); + } + + /** + * @param conf Configuration + * @return RM Id on success + * @throws YarnRuntimeException for configurations without a node id + */ + @VisibleForTesting + public static String getRMHAId(Configuration conf) { + String rmId = conf.get(YarnConfiguration.RM_HA_ID); + if (rmId == null) { + throwBadConfigurationException(YarnConfiguration.RM_HA_ID + + " needs to be set in a HA configuration"); + } + return rmId; + } + + private static String getConfValueForRMInstance(String prefix, + Configuration conf) { + String confKey = addSuffix(prefix, getRMHAId(conf)); + String retVal = conf.get(confKey); + if (LOG.isTraceEnabled()) { + LOG.trace("getConfValueForRMInstance: prefix = " + prefix + + "; confKey being looked up = " + confKey + + "; value being set to = " + retVal); + } + return retVal; + } + + static String getConfValueForRMInstance(String prefix, String defaultValue, + Configuration conf) { + String value = getConfValueForRMInstance(prefix, conf); + return (value == null) ? defaultValue : value; + } + + private static void setConfValue(String prefix, Configuration conf) { + conf.set(prefix, getConfValueForRMInstance(prefix, conf)); + } + + public static void setAllRpcAddresses(Configuration conf) { + for (String confKey : RPC_ADDRESS_CONF_KEYS) { + setConfValue(confKey, conf); + } + } + + /** Add non empty and non null suffix to a key */ + @VisibleForTesting + public static String addSuffix(String key, String suffix) { + if (suffix == null || suffix.isEmpty()) { + return key; + } + if (suffix.startsWith(".")) { + throw new IllegalArgumentException("suffix '" + suffix + "' should not " + + "already have '.' prepended."); + } + return key + "." + suffix; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index dc5baa1a166..e1327dee5a6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -18,15 +18,12 @@ package org.apache.hadoop.yarn.conf; -import java.net.InetAddress; import java.net.InetSocketAddress; -import java.net.UnknownHostException; import java.util.Arrays; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; @@ -84,7 +81,7 @@ public class YarnConfiguration extends Configuration { // Resource Manager Configs //////////////////////////////// public static final String RM_PREFIX = "yarn.resourcemanager."; - + /** The address of the applications manager interface in the RM.*/ public static final String RM_ADDRESS = RM_PREFIX + "address"; @@ -281,6 +278,8 @@ public class YarnConfiguration extends Configuration { public static final String RM_HA_ENABLED = RM_HA_PREFIX + "enabled"; public static final boolean DEFAULT_RM_HA_ENABLED = false; + public static final String RM_HA_IDS = RM_HA_PREFIX + "rm-ids"; + public static final String RM_HA_ID = RM_HA_PREFIX + "id"; //////////////////////////////// // RM state store configs @@ -854,4 +853,24 @@ public class YarnConfiguration extends Configuration { this.reloadConfiguration(); } } + + /** + * Get the socket address for name property as a + * InetSocketAddress. + * @param name property name. + * @param defaultAddress the default value + * @param defaultPort the default port + * @return InetSocketAddress + */ + @Override + public InetSocketAddress getSocketAddr( + String name, String defaultAddress, int defaultPort) { + String address; + if (HAUtil.isHAEnabled(this)) { + address = HAUtil.getConfValueForRMInstance(name, defaultAddress, this); + } else { + address = get(name, defaultAddress); + } + return NetUtils.createSocketAddr(address, defaultPort, name); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java index 37474b89adf..049f4cc8266 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java @@ -38,8 +38,11 @@ public class ClientRMProxy extends RMProxy { private static final Log LOG = LogFactory.getLog(ClientRMProxy.class); - public static T createRMProxy(final Configuration conf, + public static T createRMProxy(final Configuration configuration, final Class protocol) throws IOException { + YarnConfiguration conf = (configuration instanceof YarnConfiguration) + ? (YarnConfiguration) configuration + : new YarnConfiguration(configuration); InetSocketAddress rmAddress = getRMAddress(conf, protocol); return createRMProxy(conf, protocol, rmAddress); } @@ -60,7 +63,7 @@ public class ClientRMProxy extends RMProxy { } } - private static InetSocketAddress getRMAddress(Configuration conf, + private static InetSocketAddress getRMAddress(YarnConfiguration conf, Class protocol) throws IOException { if (protocol == ApplicationClientProtocol.class) { return conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 171b118b7bf..6dfeb6662e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -320,13 +320,34 @@ - Enable RM high-availability. When enabled, the RM starts - in the Standby mode by default, and transitions to the Active mode when - prompted to. + Enable RM high-availability. When enabled, + (1) The RM starts in the Standby mode by default, and transitions to + the Active mode when prompted to. + (2) The nodes in the RM ensemble are listed in + yarn.resourcemanager.ha.rm-ids + (3) The id of each RM comes from yarn.resourcemanager.ha.id + (4) The actual physical addresses come from the configs of the pattern + - {rpc-config}.{id} yarn.resourcemanager.ha.enabled false + + The list of RM nodes in the cluster when HA is + enabled. See description of yarn.resourcemanager.ha + .enabled for full details on how this is used. + yarn.resourcemanager.ha.rm-ids + + + + + The id (string) of the current RM. When HA is enabled, this + is a required config. See description of yarn.resourcemanager.ha.enabled + for full details on how this is used. + yarn.resourcemanager.ha.id + + + The maximum number of completed applications RM keeps. yarn.resourcemanager.max-completed-applications diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestHAUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestHAUtil.java new file mode 100644 index 00000000000..e0e46c4dc14 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestHAUtil.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.conf; + +import org.apache.hadoop.conf.Configuration; + +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.junit.Before; +import org.junit.Test; + +import java.util.Collection; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +public class TestHAUtil { + private Configuration conf; + + private static final String RM1_ADDRESS = "1.2.3.4:8021"; + private static final String RM2_ADDRESS = "localhost:8022"; + private static final String RM1_NODE_ID = "rm1"; + private static final String RM2_NODE_ID = "rm2"; + + @Before + public void setUp() { + conf = new Configuration(); + conf.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID); + conf.set(YarnConfiguration.RM_HA_ID, RM1_NODE_ID); + + for (String confKey : HAUtil.RPC_ADDRESS_CONF_KEYS) { + conf.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS); + conf.set(HAUtil.addSuffix(confKey, RM2_NODE_ID), RM2_ADDRESS); + } + } + + @Test + public void testGetRMServiceId() throws Exception { + Collection rmhaIds = HAUtil.getRMHAIds(conf); + assertEquals(2, rmhaIds.size()); + } + + @Test + public void testGetRMId() throws Exception { + assertEquals("Does not honor " + YarnConfiguration.RM_HA_ID, + RM1_NODE_ID, HAUtil.getRMHAId(conf)); + conf = new YarnConfiguration(); + try { + HAUtil.getRMHAId(conf); + fail("getRMHAId() fails to throw an exception when RM_HA_ID is not set"); + } catch (YarnRuntimeException yre) { + // do nothing + } + } + + @Test + public void testSetGetRpcAddresses() throws Exception { + HAUtil.setAllRpcAddresses(conf); + for (String confKey : HAUtil.RPC_ADDRESS_CONF_KEYS) { + assertEquals("RPC address not set for " + confKey, + RM1_ADDRESS, conf.get(confKey)); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java index 0f90310d998..c25c5977b80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java @@ -31,13 +31,17 @@ public class ServerRMProxy extends RMProxy { private static final Log LOG = LogFactory.getLog(ServerRMProxy.class); - public static T createRMProxy(final Configuration conf, + public static T createRMProxy(final Configuration configuration, final Class protocol) throws IOException { + YarnConfiguration conf = (configuration instanceof YarnConfiguration) + ? (YarnConfiguration) configuration + : new YarnConfiguration(configuration); InetSocketAddress rmAddress = getRMAddress(conf, protocol); return createRMProxy(conf, protocol, rmAddress); } - private static InetSocketAddress getRMAddress(Configuration conf, Class protocol) { + private static InetSocketAddress getRMAddress(YarnConfiguration conf, + Class protocol) { if (protocol == ResourceTracker.class) { return conf.getSocketAddr( YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java index b9aca3cbe43..8fb92facd73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java @@ -29,8 +29,8 @@ import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceStatus; import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.service.AbstractService; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.conf.HAUtil; import java.io.IOException; @@ -44,6 +44,7 @@ public class RMHAProtocolService extends AbstractService implements private ResourceManager rm; @VisibleForTesting protected HAServiceState haState = HAServiceState.INITIALIZING; + private boolean haEnabled; public RMHAProtocolService(ResourceManager resourceManager) { super("RMHAProtocolService"); @@ -51,17 +52,20 @@ public class RMHAProtocolService extends AbstractService implements } @Override - public synchronized void serviceInit(Configuration conf) throws Exception { + protected synchronized void serviceInit(Configuration conf) throws + Exception { this.conf = conf; + haEnabled = HAUtil.isHAEnabled(this.conf); + if (haEnabled) { + HAUtil.setAllRpcAddresses(this.conf); + rm.setConf(this.conf); + } rm.createAndInitActiveServices(); super.serviceInit(this.conf); } @Override - public synchronized void serviceStart() throws Exception { - boolean haEnabled = this.conf.getBoolean(YarnConfiguration.RM_HA_ENABLED, - YarnConfiguration.DEFAULT_RM_HA_ENABLED); - + protected synchronized void serviceStart() throws Exception { if (haEnabled) { transitionToStandby(true); } else { @@ -72,7 +76,7 @@ public class RMHAProtocolService extends AbstractService implements } @Override - public synchronized void serviceStop() throws Exception { + protected synchronized void serviceStop() throws Exception { transitionToStandby(false); haState = HAServiceState.STOPPING; super.serviceStop(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 72d38084b3d..3a059217759 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -731,6 +731,10 @@ public class ResourceManager extends CompositeService implements Recoverable { webApp = builder.start(new RMWebApp(this)); } + void setConf(Configuration configuration) { + conf = configuration; + } + /** * Helper method to create and init {@link #activeServices}. This creates an * instance of {@link RMActiveServices} and initializes it. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java index 7415791f094..869526e97cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java @@ -26,6 +26,7 @@ import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.conf.HAUtil; import org.junit.Before; import org.junit.Test; @@ -42,10 +43,19 @@ public class TestRMHA { private static final String STATE_ERR = "ResourceManager is in wrong HA state"; + private static final String RM1_ADDRESS = "0.0.0.0:0"; + private static final String RM1_NODE_ID = "rm1"; + @Before public void setUp() throws Exception { Configuration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID); + for (String confKey : HAUtil.RPC_ADDRESS_CONF_KEYS) { + conf.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS); + } + conf.set(YarnConfiguration.RM_HA_ID, RM1_NODE_ID); + rm = new MockRM(conf); rm.init(conf); } From 3c1f18de4353215acf9b6820774e84010a0be819 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Fri, 4 Oct 2013 20:54:06 +0000 Subject: [PATCH 049/133] HDFS-5300. FSNameSystem#deleteSnapshot() should not check owner in case of permissions disabled. Contributed by Vinay. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529294 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hdfs/server/namenode/FSNamesystem.java | 4 +- .../snapshot/TestSnapshotDeletion.java | 39 ++++++++++++++++++- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f38e426011c..b4024a93726 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -391,6 +391,9 @@ Release 2.1.2 - UNRELEASED HDFS-5289. Race condition in TestRetryCacheWithHA#testCreateSymlink causes spurious test failure. (atm) + HDFS-5300. FSNameSystem#deleteSnapshot() should not check owner in case of + permissions disabled. (Vinay via jing9) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 429716c32f9..5a6d5387c8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -6796,7 +6796,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throw new SafeModeException( "Cannot delete snapshot for " + snapshotRoot, safeMode); } - checkOwner(pc, snapshotRoot); + if (isPermissionEnabled) { + checkOwner(pc, snapshotRoot); + } BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); List removedINodes = new ChunkedArrayList(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java index e795673fc07..2408b4efb8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java @@ -25,10 +25,12 @@ import static org.junit.Assert.fail; import java.io.FileNotFoundException; import java.io.IOException; +import java.security.PrivilegedAction; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -45,7 +47,9 @@ import org.apache.hadoop.hdfs.server.namenode.INodeFile; import org.apache.hadoop.hdfs.server.namenode.Quota; import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList; import org.apache.hadoop.hdfs.util.ReadOnlyList; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; import org.junit.Before; @@ -777,7 +781,40 @@ public class TestSnapshotDeletion { assertEquals("user1", statusOfS1.getOwner()); assertEquals("group1", statusOfS1.getGroup()); } - + + @Test + public void testDeleteSnapshotWithPermissionsDisabled() throws Exception { + cluster.shutdown(); + Configuration newConf = new Configuration(conf); + newConf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); + cluster = new MiniDFSCluster.Builder(newConf).numDataNodes(0).build(); + cluster.waitActive(); + hdfs = cluster.getFileSystem(); + + final Path path = new Path("/dir"); + hdfs.mkdirs(path); + hdfs.allowSnapshot(path); + hdfs.mkdirs(new Path(path, "/test")); + hdfs.createSnapshot(path, "s1"); + UserGroupInformation anotherUser = UserGroupInformation + .createRemoteUser("anotheruser"); + anotherUser.doAs(new PrivilegedAction() { + @Override + public Object run() { + DistributedFileSystem anotherUserFS = null; + try { + anotherUserFS = cluster.getFileSystem(); + anotherUserFS.deleteSnapshot(path, "s1"); + } catch (IOException e) { + fail("Failed to delete snapshot : " + e.getLocalizedMessage()); + } finally { + IOUtils.closeStream(anotherUserFS); + } + return null; + } + }); + } + /** * A test covering the case where the snapshot diff to be deleted is renamed * to its previous snapshot. From 8549c34917351f73fa4264b7a050edbf01442969 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Fri, 4 Oct 2013 20:56:21 +0000 Subject: [PATCH 050/133] HADOOP-9225. Cover package org.apache.hadoop.compress.Snappy. Contributed by Vadim Bondarev, Andrey Klochkov and Nathan Roberts git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529296 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../TestSnappyCompressorDecompressor.java | 342 ++++++++++++++++++ 2 files changed, 345 insertions(+) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 77b6212c2ed..09002cdf859 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -345,6 +345,9 @@ Release 2.3.0 - UNRELEASED HADOOP-9254. Cover packages org.apache.hadoop.util.bloom, org.apache.hadoop.util.hash (Vadim Bondarev via jlowe) + HADOOP-9225. Cover package org.apache.hadoop.compress.Snappy (Vadim + Bondarev, Andrey Klochkov and Nathan Roberts via jlowe) + OPTIMIZATIONS HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java new file mode 100644 index 00000000000..b59ed62bf44 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -0,0 +1,342 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress.snappy; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.lang.reflect.Array; +import java.util.Random; + +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.compress.BlockCompressorStream; +import org.apache.hadoop.io.compress.BlockDecompressorStream; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.SnappyCodec; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assume.*; + +public class TestSnappyCompressorDecompressor { + + @Before + public void before() { + assumeTrue(SnappyCodec.isNativeCodeLoaded()); + } + + @Test + public void testSnappyCompressorSetInputNullPointerException() { + try { + SnappyCompressor compressor = new SnappyCompressor(); + compressor.setInput(null, 0, 10); + fail("testSnappyCompressorSetInputNullPointerException error !!!"); + } catch (NullPointerException ex) { + // excepted + } catch (Exception ex) { + fail("testSnappyCompressorSetInputNullPointerException ex error !!!"); + } + } + + @Test + public void testSnappyDecompressorSetInputNullPointerException() { + try { + SnappyDecompressor decompressor = new SnappyDecompressor(); + decompressor.setInput(null, 0, 10); + fail("testSnappyDecompressorSetInputNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testSnappyDecompressorSetInputNullPointerException ex error !!!"); + } + } + + @Test + public void testSnappyCompressorSetInputAIOBException() { + try { + SnappyCompressor compressor = new SnappyCompressor(); + compressor.setInput(new byte[] {}, -5, 10); + fail("testSnappyCompressorSetInputAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception ex) { + fail("testSnappyCompressorSetInputAIOBException ex error !!!"); + } + } + + @Test + public void testSnappyDecompressorSetInputAIOUBException() { + try { + SnappyDecompressor decompressor = new SnappyDecompressor(); + decompressor.setInput(new byte[] {}, -5, 10); + fail("testSnappyDecompressorSetInputAIOUBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception e) { + fail("testSnappyDecompressorSetInputAIOUBException ex error !!!"); + } + } + + @Test + public void testSnappyCompressorCompressNullPointerException() { + try { + SnappyCompressor compressor = new SnappyCompressor(); + byte[] bytes = BytesGenerator.get(1024 * 6); + compressor.setInput(bytes, 0, bytes.length); + compressor.compress(null, 0, 0); + fail("testSnappyCompressorCompressNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testSnappyCompressorCompressNullPointerException ex error !!!"); + } + } + + @Test + public void testSnappyDecompressorCompressNullPointerException() { + try { + SnappyDecompressor decompressor = new SnappyDecompressor(); + byte[] bytes = BytesGenerator.get(1024 * 6); + decompressor.setInput(bytes, 0, bytes.length); + decompressor.decompress(null, 0, 0); + fail("testSnappyDecompressorCompressNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testSnappyDecompressorCompressNullPointerException ex error !!!"); + } + } + + @Test + public void testSnappyCompressorCompressAIOBException() { + try { + SnappyCompressor compressor = new SnappyCompressor(); + byte[] bytes = BytesGenerator.get(1024 * 6); + compressor.setInput(bytes, 0, bytes.length); + compressor.compress(new byte[] {}, 0, -1); + fail("testSnappyCompressorCompressAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception e) { + fail("testSnappyCompressorCompressAIOBException ex error !!!"); + } + } + + @Test + public void testSnappyDecompressorCompressAIOBException() { + try { + SnappyDecompressor decompressor = new SnappyDecompressor(); + byte[] bytes = BytesGenerator.get(1024 * 6); + decompressor.setInput(bytes, 0, bytes.length); + decompressor.decompress(new byte[] {}, 0, -1); + fail("testSnappyDecompressorCompressAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception e) { + fail("testSnappyDecompressorCompressAIOBException ex error !!!"); + } + } + + @Test + public void testSnappyCompressDecompress() { + int BYTE_SIZE = 1024 * 54; + byte[] bytes = BytesGenerator.get(BYTE_SIZE); + SnappyCompressor compressor = new SnappyCompressor(); + try { + compressor.setInput(bytes, 0, bytes.length); + assertTrue("SnappyCompressDecompress getBytesRead error !!!", + compressor.getBytesRead() > 0); + assertTrue( + "SnappyCompressDecompress getBytesWritten before compress error !!!", + compressor.getBytesWritten() == 0); + + byte[] compressed = new byte[BYTE_SIZE]; + int cSize = compressor.compress(compressed, 0, compressed.length); + assertTrue( + "SnappyCompressDecompress getBytesWritten after compress error !!!", + compressor.getBytesWritten() > 0); + + SnappyDecompressor decompressor = new SnappyDecompressor(BYTE_SIZE); + // set as input for decompressor only compressed data indicated with cSize + decompressor.setInput(compressed, 0, cSize); + byte[] decompressed = new byte[BYTE_SIZE]; + decompressor.decompress(decompressed, 0, decompressed.length); + + assertTrue("testSnappyCompressDecompress finished error !!!", + decompressor.finished()); + Assert.assertArrayEquals(bytes, decompressed); + compressor.reset(); + decompressor.reset(); + assertTrue("decompressor getRemaining error !!!", + decompressor.getRemaining() == 0); + } catch (Exception e) { + fail("testSnappyCompressDecompress ex error!!!"); + } + } + + @Test + public void testCompressorDecompressorEmptyStreamLogic() { + ByteArrayInputStream bytesIn = null; + ByteArrayOutputStream bytesOut = null; + byte[] buf = null; + BlockDecompressorStream blockDecompressorStream = null; + try { + // compress empty stream + bytesOut = new ByteArrayOutputStream(); + BlockCompressorStream blockCompressorStream = new BlockCompressorStream( + bytesOut, new SnappyCompressor(), 1024, 0); + // close without write + blockCompressorStream.close(); + + // check compressed output + buf = bytesOut.toByteArray(); + assertEquals("empty stream compressed output size != 4", 4, buf.length); + + // use compressed output as input for decompression + bytesIn = new ByteArrayInputStream(buf); + + // create decompression stream + blockDecompressorStream = new BlockDecompressorStream(bytesIn, + new SnappyDecompressor(), 1024); + + // no byte is available because stream was closed + assertEquals("return value is not -1", -1, blockDecompressorStream.read()); + } catch (Exception e) { + fail("testCompressorDecompressorEmptyStreamLogic ex error !!!" + + e.getMessage()); + } finally { + if (blockDecompressorStream != null) + try { + bytesIn.close(); + bytesOut.close(); + blockDecompressorStream.close(); + } catch (IOException e) { + } + } + } + + @Test + public void testSnappyBlockCompression() { + int BYTE_SIZE = 1024 * 50; + int BLOCK_SIZE = 512; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] block = new byte[BLOCK_SIZE]; + byte[] bytes = BytesGenerator.get(BYTE_SIZE); + try { + // Use default of 512 as bufferSize and compressionOverhead of + // (1% of bufferSize + 12 bytes) = 18 bytes (zlib algorithm). + SnappyCompressor compressor = new SnappyCompressor(); + int off = 0; + int len = BYTE_SIZE; + int maxSize = BLOCK_SIZE - 18; + if (BYTE_SIZE > maxSize) { + do { + int bufLen = Math.min(len, maxSize); + compressor.setInput(bytes, off, bufLen); + compressor.finish(); + while (!compressor.finished()) { + compressor.compress(block, 0, block.length); + out.write(block); + } + compressor.reset(); + off += bufLen; + len -= bufLen; + } while (len > 0); + } + assertTrue("testSnappyBlockCompression error !!!", + out.toByteArray().length > 0); + } catch (Exception ex) { + fail("testSnappyBlockCompression ex error !!!"); + } + } + + @Test + public void testSnappyCompressorDecopressorLogicWithCompressionStreams() { + int BYTE_SIZE = 1024 * 100; + byte[] bytes = BytesGenerator.get(BYTE_SIZE); + int bufferSize = 262144; + int compressionOverhead = (bufferSize / 6) + 32; + DataOutputStream deflateOut = null; + DataInputStream inflateIn = null; + try { + DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); + CompressionOutputStream deflateFilter = new BlockCompressorStream( + compressedDataBuffer, new SnappyCompressor(bufferSize), bufferSize, + compressionOverhead); + deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter)); + + deflateOut.write(bytes, 0, bytes.length); + deflateOut.flush(); + deflateFilter.finish(); + + DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); + deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, + compressedDataBuffer.getLength()); + + CompressionInputStream inflateFilter = new BlockDecompressorStream( + deCompressedDataBuffer, new SnappyDecompressor(bufferSize), + bufferSize); + + inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter)); + + byte[] result = new byte[BYTE_SIZE]; + inflateIn.read(result); + + Assert.assertArrayEquals( + "original array not equals compress/decompressed array", result, + bytes); + } catch (IOException e) { + fail("testSnappyCompressorDecopressorLogicWithCompressionStreams ex error !!!"); + } finally { + try { + if (deflateOut != null) + deflateOut.close(); + if (inflateIn != null) + inflateIn.close(); + } catch (Exception e) { + } + } + } + + static final class BytesGenerator { + private BytesGenerator() { + } + + private static final byte[] CACHE = new byte[] { 0x0, 0x1, 0x2, 0x3, 0x4, + 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF }; + private static final Random rnd = new Random(12345l); + + public static byte[] get(int size) { + byte[] array = (byte[]) Array.newInstance(byte.class, size); + for (int i = 0; i < size; i++) + array[i] = CACHE[rnd.nextInt(CACHE.length - 1)]; + return array; + } + } +} From 80c5bffc4b6bd2aa54ed8d49a32fd2c2cb77bfcc Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Fri, 4 Oct 2013 21:59:43 +0000 Subject: [PATCH 051/133] YARN-1253. Changes to LinuxContainerExecutor to run containers as a single dedicated user in non-secure mode. (rvs via tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529325 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/site/apt/ClusterSetup.apt.vm | 6 +- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/conf/YarnConfiguration.java | 22 ++++- .../src/main/resources/yarn-default.xml | 15 ++++ .../nodemanager/LinuxContainerExecutor.java | 38 +++++++- .../impl/container-executor.c | 4 +- .../native/container-executor/impl/main.c | 33 ++++--- .../test/test-container-executor.c | 67 ++++++++------ .../TestLinuxContainerExecutor.java | 90 +++++++++++++++++++ .../TestLinuxContainerExecutorWithMocks.java | 37 ++++---- 10 files changed, 256 insertions(+), 59 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm index 1e2a2c0b994..01044fdccf6 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm @@ -854,8 +854,10 @@ KVNO Timestamp Principal | | The container process has the same Unix user as the NodeManager. | *--------------------------------------+--------------------------------------+ | <<>> | | -| | Supported only on GNU/Linux, this executor runs the containers as the | -| | user who submitted the application. It requires all user accounts to be | +| | Supported only on GNU/Linux, this executor runs the containers as either the | +| | YARN user who submitted the application (when full security is enabled) or | +| | as a dedicated user (defaults to nobody) when full security is not enabled. | +| | When full security is enabled, this executor requires all user accounts to be | | | created on the cluster nodes where the containers are launched. It uses | | | a executable that is included in the Hadoop distribution. | | | The NodeManager uses this executable to launch and kill containers. | diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index e249e2c13e2..588a8e7f331 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -29,6 +29,9 @@ Release 2.3.0 - UNRELEASED YARN-1010. FairScheduler: decouple container scheduling from nodemanager heartbeats. (Wei Yan via Sandy Ryza) + YARN-1253. Changes to LinuxContainerExecutor to run containers as a single + dedicated user in non-secure mode. (rvs via tucu) + IMPROVEMENTS YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index e1327dee5a6..31f344293d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -635,7 +635,27 @@ public class YarnConfiguration extends Configuration { */ public static final String NM_LINUX_CONTAINER_GROUP = NM_PREFIX + "linux-container-executor.group"; - + + /** + * The UNIX user that containers will run as when Linux-container-executor + * is used in nonsecure mode (a use case for this is using cgroups). + */ + public static final String NM_NONSECURE_MODE_LOCAL_USER_KEY = NM_PREFIX + + "linux-container-executor.nonsecure-mode.local-user"; + + public static final String DEFAULT_NM_NONSECURE_MODE_LOCAL_USER = "nobody"; + + /** + * The allowed pattern for UNIX user names enforced by + * Linux-container-executor when used in nonsecure mode (use case for this + * is using cgroups). The default value is taken from /usr/sbin/adduser + */ + public static final String NM_NONSECURE_MODE_USER_PATTERN_KEY = NM_PREFIX + + "linux-container-executor.nonsecure-mode.user-pattern"; + + public static final String DEFAULT_NM_NONSECURE_MODE_USER_PATTERN = + "^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$"; + /** The type of resource enforcement to use with the * linux container executor. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 6dfeb6662e0..0127fcc579f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -727,6 +727,21 @@ yarn.nodemanager.linux-container-executor.cgroups.mount-path + + The UNIX user that containers will run as when Linux-container-executor + is used in nonsecure mode (a use case for this is using cgroups). + yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user + nobody + + + + The allowed pattern for UNIX user names enforced by + Linux-container-executor when used in nonsecure mode (use case for this + is using cgroups). The default value is taken from /usr/sbin/adduser + yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern + ^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$ + + T-file compression types used to compress aggregated logs. yarn.nodemanager.log-aggregation.compression-type diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index af72d7195c4..ebed090c2ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -24,11 +24,13 @@ import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; @@ -48,6 +50,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { private static final Log LOG = LogFactory .getLog(LinuxContainerExecutor.class); + private String nonsecureLocalUser; + private Pattern nonsecureLocalUserPattern; private String containerExecutorExe; private LCEResourcesHandler resourcesHandler; private boolean containerSchedPriorityIsSet = false; @@ -70,6 +74,24 @@ public class LinuxContainerExecutor extends ContainerExecutor { .getInt(YarnConfiguration.NM_CONTAINER_EXECUTOR_SCHED_PRIORITY, YarnConfiguration.DEFAULT_NM_CONTAINER_EXECUTOR_SCHED_PRIORITY); } + nonsecureLocalUser = conf.get( + YarnConfiguration.NM_NONSECURE_MODE_LOCAL_USER_KEY, + YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER); + nonsecureLocalUserPattern = Pattern.compile( + conf.get(YarnConfiguration.NM_NONSECURE_MODE_USER_PATTERN_KEY, + YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_USER_PATTERN)); + } + + void verifyUsernamePattern(String user) { + if (!UserGroupInformation.isSecurityEnabled() && + !nonsecureLocalUserPattern.matcher(user).matches()) { + throw new IllegalArgumentException("Invalid user name '" + user + "'," + + " it must match '" + nonsecureLocalUserPattern.pattern() + "'"); + } + } + + String getRunAsUser(String user) { + return UserGroupInformation.isSecurityEnabled() ? user : nonsecureLocalUser; } /** @@ -162,9 +184,12 @@ public class LinuxContainerExecutor extends ContainerExecutor { List localDirs, List logDirs) throws IOException, InterruptedException { + verifyUsernamePattern(user); + String runAsUser = getRunAsUser(user); List command = new ArrayList(); addSchedPriorityCommand(command); command.addAll(Arrays.asList(containerExecutorExe, + runAsUser, user, Integer.toString(Commands.INITIALIZE_CONTAINER.getValue()), appId, @@ -218,6 +243,9 @@ public class LinuxContainerExecutor extends ContainerExecutor { String user, String appId, Path containerWorkDir, List localDirs, List logDirs) throws IOException { + verifyUsernamePattern(user); + String runAsUser = getRunAsUser(user); + ContainerId containerId = container.getContainerId(); String containerIdStr = ConverterUtils.toString(containerId); @@ -234,7 +262,7 @@ public class LinuxContainerExecutor extends ContainerExecutor { List command = new ArrayList(); addSchedPriorityCommand(command); command.addAll(Arrays.asList( - containerExecutorExe, user, Integer + containerExecutorExe, runAsUser, user, Integer .toString(Commands.LAUNCH_CONTAINER.getValue()), appId, containerIdStr, containerWorkDir.toString(), nmPrivateCotainerScriptPath.toUri().getPath().toString(), @@ -293,8 +321,12 @@ public class LinuxContainerExecutor extends ContainerExecutor { public boolean signalContainer(String user, String pid, Signal signal) throws IOException { + verifyUsernamePattern(user); + String runAsUser = getRunAsUser(user); + String[] command = new String[] { containerExecutorExe, + runAsUser, user, Integer.toString(Commands.SIGNAL_CONTAINER.getValue()), pid, @@ -322,8 +354,12 @@ public class LinuxContainerExecutor extends ContainerExecutor { @Override public void deleteAsUser(String user, Path dir, Path... baseDirs) { + verifyUsernamePattern(user); + String runAsUser = getRunAsUser(user); + List command = new ArrayList( Arrays.asList(containerExecutorExe, + runAsUser, user, Integer.toString(Commands.DELETE_AS_USER.getValue()), dir == null ? "" : dir.toUri().getPath())); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 307e0fafda7..d5d894d42da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -407,7 +407,7 @@ static int create_container_directories(const char* user, const char *app_id, const char *container_id, char* const* local_dir, char* const* log_dir, const char *work_dir) { // create dirs as 0750 const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP; - if (app_id == NULL || container_id == NULL || user == NULL) { + if (app_id == NULL || container_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { fprintf(LOGFILE, "Either app_id, container_id or the user passed is null.\n"); return -1; @@ -758,7 +758,7 @@ int initialize_app(const char *user, const char *app_id, const char* nmPrivate_credentials_file, char* const* local_dirs, char* const* log_roots, char* const* args) { - if (app_id == NULL || user == NULL) { + if (app_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n"); return INVALID_ARGUMENT_NUMBER; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index f0245d81dc1..9b5e784d520 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -49,7 +49,7 @@ void display_usage(FILE *stream) { "Usage: container-executor --mount-cgroups "\ "hierarchy controller=path...\n"); fprintf(stream, - "Usage: container-executor user command command-args\n"); + "Usage: container-executor user yarn-user command command-args\n"); fprintf(stream, "Commands:\n"); fprintf(stream, " initialize container: %2d appid tokens " \ "nm-local-dirs nm-log-dirs cmd app...\n", INITIALIZE_CONTAINER); @@ -178,18 +178,29 @@ int main(int argc, char **argv) { if (ret != 0) { return ret; } + + // this string is used for building pathnames, the + // process management is done based on the 'user_detail' + // global, which was set by 'set_user()' above + optind = optind + 1; + char *yarn_user_name = argv[optind]; + if (yarn_user_name == NULL) { + fprintf(ERRORFILE, "Invalid yarn user name.\n"); + return INVALID_USER_NAME; + } optind = optind + 1; command = atoi(argv[optind++]); fprintf(LOGFILE, "main : command provided %d\n",command); fprintf(LOGFILE, "main : user is %s\n", user_detail->pw_name); + fprintf(LOGFILE, "main : requested yarn user is %s\n", yarn_user_name); fflush(LOGFILE); switch (command) { case INITIALIZE_CONTAINER: - if (argc < 8) { - fprintf(ERRORFILE, "Too few arguments (%d vs 8) for initialize container\n", + if (argc < 9) { + fprintf(ERRORFILE, "Too few arguments (%d vs 9) for initialize container\n", argc); fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; @@ -198,13 +209,13 @@ int main(int argc, char **argv) { cred_file = argv[optind++]; local_dirs = argv[optind++];// good local dirs as a comma separated list log_dirs = argv[optind++];// good log dirs as a comma separated list - exit_code = initialize_app(user_detail->pw_name, app_id, cred_file, + exit_code = initialize_app(yarn_user_name, app_id, cred_file, extract_values(local_dirs), extract_values(log_dirs), argv + optind); break; case LAUNCH_CONTAINER: - if (argc != 12) { - fprintf(ERRORFILE, "Wrong number of arguments (%d vs 12) for launch container\n", + if (argc != 13) { + fprintf(ERRORFILE, "Wrong number of arguments (%d vs 13) for launch container\n", argc); fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; @@ -230,7 +241,7 @@ int main(int argc, char **argv) { return INVALID_ARGUMENT_NUMBER; } char** resources_values = extract_values(resources_value); - exit_code = launch_container_as_user(user_detail->pw_name, app_id, + exit_code = launch_container_as_user(yarn_user_name, app_id, container_id, current_dir, script_file, cred_file, pid_file, extract_values(local_dirs), extract_values(log_dirs), resources_key, @@ -239,8 +250,8 @@ int main(int argc, char **argv) { free(resources_value); break; case SIGNAL_CONTAINER: - if (argc != 5) { - fprintf(ERRORFILE, "Wrong number of arguments (%d vs 5) for " \ + if (argc != 6) { + fprintf(ERRORFILE, "Wrong number of arguments (%d vs 6) for " \ "signal container\n", argc); fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; @@ -260,12 +271,12 @@ int main(int argc, char **argv) { fflush(ERRORFILE); return INVALID_ARGUMENT_NUMBER; } - exit_code = signal_container_as_user(user_detail->pw_name, container_pid, signal); + exit_code = signal_container_as_user(yarn_user_name, container_pid, signal); } break; case DELETE_AS_USER: dir_to_be_deleted = argv[optind++]; - exit_code= delete_as_user(user_detail->pw_name, dir_to_be_deleted, + exit_code= delete_as_user(yarn_user_name, dir_to_be_deleted, argv + optind); break; default: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c index e995bf24132..e9a47b1cbe1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c @@ -36,6 +36,7 @@ #define ARRAY_SIZE 1000 static char* username = NULL; +static char* yarn_username = NULL; static char** local_dirs = NULL; static char** log_dirs = NULL; @@ -252,15 +253,15 @@ void test_check_configuration_permissions() { } void test_delete_container() { - if (initialize_user(username, local_dirs)) { - printf("FAIL: failed to initialize user %s\n", username); + if (initialize_user(yarn_username, local_dirs)) { + printf("FAIL: failed to initialize user %s\n", yarn_username); exit(1); } - char* app_dir = get_app_directory(TEST_ROOT "/local-2", username, "app_1"); - char* dont_touch = get_app_directory(TEST_ROOT "/local-2", username, + char* app_dir = get_app_directory(TEST_ROOT "/local-2", yarn_username, "app_1"); + char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, DONT_TOUCH_FILE); char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", - username, "app_1", "container_1"); + yarn_username, "app_1", "container_1"); char buffer[100000]; sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir); run(buffer); @@ -287,7 +288,7 @@ void test_delete_container() { // delete container directory char * dirs[] = {app_dir, 0}; - int ret = delete_as_user(username, "container_1" , dirs); + int ret = delete_as_user(yarn_username, "container_1" , dirs); if (ret != 0) { printf("FAIL: return code from delete_as_user is %d\n", ret); exit(1); @@ -318,11 +319,11 @@ void test_delete_container() { } void test_delete_app() { - char* app_dir = get_app_directory(TEST_ROOT "/local-2", username, "app_2"); - char* dont_touch = get_app_directory(TEST_ROOT "/local-2", username, + char* app_dir = get_app_directory(TEST_ROOT "/local-2", yarn_username, "app_2"); + char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, DONT_TOUCH_FILE); char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", - username, "app_2", "container_1"); + yarn_username, "app_2", "container_1"); char buffer[100000]; sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir); run(buffer); @@ -348,7 +349,7 @@ void test_delete_app() { run(buffer); // delete container directory - int ret = delete_as_user(username, app_dir, NULL); + int ret = delete_as_user(yarn_username, app_dir, NULL); if (ret != 0) { printf("FAIL: return code from delete_as_user is %d\n", ret); exit(1); @@ -377,17 +378,17 @@ void test_delete_app() { void test_delete_user() { printf("\nTesting delete_user\n"); - char* app_dir = get_app_directory(TEST_ROOT "/local-1", username, "app_3"); + char* app_dir = get_app_directory(TEST_ROOT "/local-1", yarn_username, "app_3"); if (mkdirs(app_dir, 0700) != 0) { exit(1); } char buffer[100000]; - sprintf(buffer, "%s/local-1/usercache/%s", TEST_ROOT, username); + sprintf(buffer, "%s/local-1/usercache/%s", TEST_ROOT, yarn_username); if (access(buffer, R_OK) != 0) { printf("FAIL: directory missing before test\n"); exit(1); } - if (delete_as_user(username, buffer, NULL) != 0) { + if (delete_as_user(yarn_username, buffer, NULL) != 0) { exit(1); } if (access(buffer, R_OK) == 0) { @@ -446,7 +447,7 @@ void test_signal_container() { exit(0); } else { printf("Child container launched as %d\n", child); - if (signal_container_as_user(username, child, SIGQUIT) != 0) { + if (signal_container_as_user(yarn_username, child, SIGQUIT) != 0) { exit(1); } int status = 0; @@ -486,7 +487,7 @@ void test_signal_container_group() { // there's a race condition for child calling change_user and us // calling signal_container_as_user, hence sleeping sleep(3); - if (signal_container_as_user(username, child, SIGKILL) != 0) { + if (signal_container_as_user(yarn_username, child, SIGKILL) != 0) { exit(1); } int status = 0; @@ -550,7 +551,7 @@ void test_init_app() { exit(1); } else if (child == 0) { char *final_pgm[] = {"touch", "my-touch-file", 0}; - if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", + if (initialize_app(yarn_username, "app_4", TEST_ROOT "/creds.txt", local_dirs, log_dirs, final_pgm) != 0) { printf("FAIL: failed in child\n"); exit(42); @@ -568,7 +569,7 @@ void test_init_app() { printf("FAIL: failed to create app log directory\n"); exit(1); } - char* app_dir = get_app_directory(TEST_ROOT "/local-1", username, "app_4"); + char* app_dir = get_app_directory(TEST_ROOT "/local-1", yarn_username, "app_4"); if (access(app_dir, R_OK) != 0) { printf("FAIL: failed to create app directory %s\n", app_dir); exit(1); @@ -640,7 +641,7 @@ void test_run_container() { fflush(stdout); fflush(stderr); char* container_dir = get_container_work_directory(TEST_ROOT "/local-1", - username, "app_4", "container_1"); + yarn_username, "app_4", "container_1"); const char * pid_file = TEST_ROOT "/pid.txt"; pid_t child = fork(); @@ -649,7 +650,7 @@ void test_run_container() { strerror(errno)); exit(1); } else if (child == 0) { - if (launch_container_as_user(username, "app_4", "container_1", + if (launch_container_as_user(yarn_username, "app_4", "container_1", container_dir, script_name, TEST_ROOT "/creds.txt", pid_file, local_dirs, log_dirs, "cgroups", cgroups_pids) != 0) { @@ -697,10 +698,22 @@ void test_run_container() { check_pid_file(cgroups_pids[1], child); } +// This test is expected to be executed either by a regular +// user or by root. If executed by a regular user it doesn't +// test all the functions that would depend on changing the +// effective user id. If executed by a super-user everything +// gets tested. Here are different ways of execing the test binary: +// 1. regular user assuming user == yarn user +// $ test-container-executor +// 2. regular user with a given yarn user +// $ test-container-executor yarn_user +// 3. super user with a given user and assuming user == yarn user +// # test-container-executor user +// 4. super user with a given user and a given yarn user +// # test-container-executor user yarn_user int main(int argc, char **argv) { LOGFILE = stdout; ERRORFILE = stderr; - int my_username = 0; // clean up any junk from previous run if (system("chmod -R u=rwx " TEST_ROOT "; rm -fr " TEST_ROOT)) { @@ -721,11 +734,15 @@ int main(int argc, char **argv) { create_nm_roots(local_dirs); - if (getuid() == 0 && argc == 2) { + // See the description above of various ways this test + // can be executed in order to understand the following logic + char* current_username = strdup(getpwuid(getuid())->pw_name); + if (getuid() == 0 && (argc == 2 || argc == 3)) { username = argv[1]; + yarn_username = (argc == 3) ? argv[2] : argv[1]; } else { - username = strdup(getpwuid(getuid())->pw_name); - my_username = 1; + username = current_username; + yarn_username = (argc == 2) ? argv[1] : current_username; } set_nm_uid(geteuid(), getegid()); @@ -783,9 +800,7 @@ int main(int argc, char **argv) { run("rm -fr " TEST_ROOT); printf("\nFinished tests\n"); - if (my_username) { - free(username); - } + free(current_username); free_configurations(); return 0; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java index f09f5a88e69..74452c92b5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java @@ -31,14 +31,17 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.HashMap; +import junit.framework.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -256,4 +259,91 @@ public class TestLinuxContainerExecutor { assertFalse(t.isAlive()); } + + @Test + public void testLocalUser() throws Exception { + try { + //nonsecure default + Configuration conf = new YarnConfiguration(); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + LinuxContainerExecutor lce = new LinuxContainerExecutor(); + lce.setConf(conf); + Assert.assertEquals(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + lce.getRunAsUser("foo")); + + //nonsecure custom setting + conf.set(YarnConfiguration.NM_NONSECURE_MODE_LOCAL_USER_KEY, "bar"); + lce = new LinuxContainerExecutor(); + lce.setConf(conf); + Assert.assertEquals("bar", lce.getRunAsUser("foo")); + + //secure + conf = new YarnConfiguration(); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "kerberos"); + UserGroupInformation.setConfiguration(conf); + lce = new LinuxContainerExecutor(); + lce.setConf(conf); + Assert.assertEquals("foo", lce.getRunAsUser("foo")); + } finally { + Configuration conf = new YarnConfiguration(); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + } + } + + @Test + public void testNonsecureUsernamePattern() throws Exception { + try { + //nonsecure default + Configuration conf = new YarnConfiguration(); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + LinuxContainerExecutor lce = new LinuxContainerExecutor(); + lce.setConf(conf); + lce.verifyUsernamePattern("foo"); + try { + lce.verifyUsernamePattern("foo/x"); + Assert.fail(); + } catch (IllegalArgumentException ex) { + //NOP + } catch (Throwable ex) { + Assert.fail(ex.toString()); + } + + //nonsecure custom setting + conf.set(YarnConfiguration.NM_NONSECURE_MODE_USER_PATTERN_KEY, "foo"); + lce = new LinuxContainerExecutor(); + lce.setConf(conf); + lce.verifyUsernamePattern("foo"); + try { + lce.verifyUsernamePattern("bar"); + Assert.fail(); + } catch (IllegalArgumentException ex) { + //NOP + } catch (Throwable ex) { + Assert.fail(ex.toString()); + } + + //secure, pattern matching does not kick in. + conf = new YarnConfiguration(); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "kerberos"); + UserGroupInformation.setConfiguration(conf); + lce = new LinuxContainerExecutor(); + lce.setConf(conf); + lce.verifyUsernamePattern("foo"); + lce.verifyUsernamePattern("foo/w"); + } finally { + Configuration conf = new YarnConfiguration(); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + } + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 67e90231f9a..096470ce115 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -128,7 +128,8 @@ public class TestLinuxContainerExecutorWithMocks { appSubmitter, appId, workDir, dirsHandler.getLocalDirs(), dirsHandler.getLogDirs()); assertEquals(0, ret); - assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId, + assertEquals(Arrays.asList(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + appSubmitter, cmd, appId, containerId, workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), StringUtils.join(",", dirsHandler.getLocalDirs()), StringUtils.join(",", dirsHandler.getLogDirs()), "cgroups=none"), @@ -180,18 +181,19 @@ public class TestLinuxContainerExecutorWithMocks { try { mockExec.startLocalizer(nmPrivateCTokensPath, address, "test", "application_0", "12345", dirsHandler.getLocalDirs(), dirsHandler.getLogDirs()); List result=readMockParams(); - Assert.assertEquals(result.size(), 16); - Assert.assertEquals(result.get(0), "test"); - Assert.assertEquals(result.get(1), "0" ); - Assert.assertEquals(result.get(2),"application_0" ); - Assert.assertEquals(result.get(3), "/bin/nmPrivateCTokensPath"); - Assert.assertEquals(result.get(7), "-classpath" ); - Assert.assertEquals(result.get(10),"org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer" ); - Assert.assertEquals(result.get(11), "test"); - Assert.assertEquals(result.get(12), "application_0"); - Assert.assertEquals(result.get(13),"12345" ); - Assert.assertEquals(result.get(14),"localhost" ); - Assert.assertEquals(result.get(15),"8040" ); + Assert.assertEquals(result.size(), 17); + Assert.assertEquals(result.get(0), YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER); + Assert.assertEquals(result.get(1), "test"); + Assert.assertEquals(result.get(2), "0" ); + Assert.assertEquals(result.get(3),"application_0" ); + Assert.assertEquals(result.get(4), "/bin/nmPrivateCTokensPath"); + Assert.assertEquals(result.get(8), "-classpath" ); + Assert.assertEquals(result.get(11),"org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer" ); + Assert.assertEquals(result.get(12), "test"); + Assert.assertEquals(result.get(13), "application_0"); + Assert.assertEquals(result.get(14),"12345" ); + Assert.assertEquals(result.get(15),"localhost" ); + Assert.assertEquals(result.get(16),"8040" ); } catch (InterruptedException e) { LOG.error("Error:"+e.getMessage(),e); @@ -246,7 +248,8 @@ public class TestLinuxContainerExecutorWithMocks { appSubmitter, appId, workDir, dirsHandler.getLocalDirs(), dirsHandler.getLogDirs()); Assert.assertNotSame(0, ret); - assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId, + assertEquals(Arrays.asList(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + appSubmitter, cmd, appId, containerId, workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(), StringUtils.join(",", dirsHandler.getLocalDirs()), StringUtils.join(",", dirsHandler.getLogDirs()), @@ -272,7 +275,8 @@ public class TestLinuxContainerExecutorWithMocks { String sigVal = String.valueOf(signal.getValue()); mockExec.signalContainer(appSubmitter, "1000", signal); - assertEquals(Arrays.asList(appSubmitter, cmd, "1000", sigVal), + assertEquals(Arrays.asList(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + appSubmitter, cmd, "1000", sigVal), readMockParams()); } @@ -284,7 +288,8 @@ public class TestLinuxContainerExecutorWithMocks { Path dir = new Path("/tmp/testdir"); mockExec.deleteAsUser(appSubmitter, dir); - assertEquals(Arrays.asList(appSubmitter, cmd, "/tmp/testdir"), + assertEquals(Arrays.asList(YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LOCAL_USER, + appSubmitter, cmd, "/tmp/testdir"), readMockParams()); } } From f104665f7ed8749e563b7512e5846f910676a324 Mon Sep 17 00:00:00 2001 From: Hitesh Shah Date: Sat, 5 Oct 2013 00:48:03 +0000 Subject: [PATCH 052/133] YARN-1251. TestDistributedShell#TestDSShell failed with timeout. Contributed by Xuan Gong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529369 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/applications/distributedshell/ApplicationMaster.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 588a8e7f331..b052539090d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -165,6 +165,9 @@ Release 2.1.2 - UNRELEASED YARN-1219. FSDownload changes file suffix making FileUtil.unTar() throw exception. (Shanyu Zhao via cnauroth) + YARN-1251. TestDistributedShell#TestDSShell failed with timeout. (Xuan Gong + via hitesh) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 740a720ad00..e9a98caf57f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -834,7 +834,7 @@ public class ApplicationMaster { // files in the distributed file-system. The tokens are otherwise also // useful in cases, for e.g., when one is running a "hadoop dfs" command // inside the distributed shell. - ctx.setTokens(allTokens); + ctx.setTokens(allTokens.duplicate()); containerListener.addContainer(container.getId(), container); nmClientAsync.startContainerAsync(container, ctx); From fc23fd312174f36508eb40f3084e262e23a68e1b Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sat, 5 Oct 2013 01:43:29 +0000 Subject: [PATCH 053/133] YARN-1167. Fixed Distributed Shell to not incorrectly show empty hostname on RM UI. Contributed by Xuan Gong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529376 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../RegisterApplicationMasterRequest.java | 1 + .../distributedshell/ApplicationMaster.java | 7 ++- .../TestDistributedShell.java | 44 +++++++++++++++++-- .../yarn/client/api/impl/AMRMClientImpl.java | 4 +- 5 files changed, 52 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b052539090d..204098a52b5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -168,6 +168,9 @@ Release 2.1.2 - UNRELEASED YARN-1251. TestDistributedShell#TestDSShell failed with timeout. (Xuan Gong via hitesh) + YARN-1167. Fixed Distributed Shell to not incorrectly show empty hostname + on RM UI. (Xuan Gong via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterRequest.java index 0b485d18fd1..6b0185461fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterRequest.java @@ -48,6 +48,7 @@ public abstract class RegisterApplicationMasterRequest { *
  • port: -1
  • *
  • trackingUrl: null
  • * + * The port is allowed to be any integer larger than or equal to -1. * @return the new instance of RegisterApplicationMasterRequest */ @Public diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index e9a98caf57f..959ba1c45f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -45,6 +45,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; @@ -99,7 +100,8 @@ import org.apache.hadoop.yarn.util.Records; * within the ResourceManager regarding what host:port the * ApplicationMaster is listening on to provide any form of functionality to a * client as well as a tracking url that a client can use to keep track of - * status/job history if needed. + * status/job history if needed. However, in the distributedshell, trackingurl + * and appMasterHost:appMasterRpcPort are not supported. *

    * *

    @@ -168,7 +170,7 @@ public class ApplicationMaster { // Hostname of the container private String appMasterHostname = ""; // Port on which the app master listens for status updates from clients - private int appMasterRpcPort = 0; + private int appMasterRpcPort = -1; // Tracking url to which app master publishes info for clients to monitor private String appMasterTrackingUrl = ""; @@ -481,6 +483,7 @@ public class ApplicationMaster { // Register self with ResourceManager // This will start heartbeating to the RM + appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient .registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java index 8b05aa1890b..7fbd2a6c9de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -24,14 +24,20 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.URL; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import junit.framework.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.JarFinder; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; @@ -117,14 +123,46 @@ public class TestDistributedShell { }; LOG.info("Initializing DS Client"); - Client client = new Client(new Configuration(yarnCluster.getConfig())); + final Client client = new Client(new Configuration(yarnCluster.getConfig())); boolean initSuccess = client.init(args); Assert.assertTrue(initSuccess); LOG.info("Running DS Client"); - boolean result = client.run(); + final AtomicBoolean result = new AtomicBoolean(false); + Thread t = new Thread() { + public void run() { + try { + result.set(client.run()); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + }; + t.start(); + YarnClient yarnClient = YarnClient.createYarnClient(); + yarnClient.init(new Configuration(yarnCluster.getConfig())); + yarnClient.start(); + String hostName = NetUtils.getHostname(); + boolean verified = false; + while(!verified) { + List apps = yarnClient.getApplications(); + if (apps.size() == 0 ) { + Thread.sleep(10); + continue; + } + ApplicationReport appReport = apps.get(0); + if (appReport.getHost().startsWith(hostName) + && appReport.getRpcPort() == -1) { + verified = true; + } + if (appReport.getYarnApplicationState() == YarnApplicationState.FINISHED) { + break; + } + } + Assert.assertTrue(verified); + t.join(); LOG.info("Client run completed. Result=" + result); - Assert.assertTrue(result); + Assert.assertTrue(result.get()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index 99ad5a90b49..39225715c19 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -188,8 +188,8 @@ public class AMRMClientImpl extends AMRMClient { throws YarnException, IOException { Preconditions.checkArgument(appHostName != null, "The host name should not be null"); - Preconditions.checkArgument(appHostPort >= 0, - "Port number of the host should not be negative"); + Preconditions.checkArgument(appHostPort >= -1, "Port number of the host" + + " should be any integers larger than or equal to -1"); // do this only once ??? RegisterApplicationMasterRequest request = RegisterApplicationMasterRequest.newInstance(appHostName, appHostPort, From 0a887a091023073b91b1a789a52e3a26e9b24bae Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sat, 5 Oct 2013 04:25:44 +0000 Subject: [PATCH 054/133] YARN-1254. Fixed NodeManager to not pollute container's credentials. Contributed by Omkar Vinit Joshi. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529382 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../localizer/ContainerLocalizer.java | 8 +++--- .../ResourceLocalizationService.java | 1 + .../localizer/TestContainerLocalizer.java | 27 ++++++++++++++++--- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 204098a52b5..986af09f211 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -171,6 +171,9 @@ Release 2.1.2 - UNRELEASED YARN-1167. Fixed Distributed Shell to not incorrectly show empty hostname on RM UI. (Xuan Gong via vinodkv) + YARN-1254. Fixed NodeManager to not pollute container's credentials. (Omkar + Vinit Joshi via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java index 66f21f6a571..51ce9e2fb19 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java @@ -65,7 +65,6 @@ import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHe import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier; -import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.FSDownload; @@ -130,9 +129,12 @@ public class ContainerLocalizer { try { // assume credentials in cwd // TODO: Fix - credFile = lfs.open( - new Path(String.format(TOKEN_FILE_NAME_FMT, localizerId))); + Path tokenPath = + new Path(String.format(TOKEN_FILE_NAME_FMT, localizerId)); + credFile = lfs.open(tokenPath); creds.readTokenStorageStream(credFile); + // Explicitly deleting token file. + lfs.delete(tokenPath, false); } finally { if (credFile != null) { credFile.close(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index 70debe05175..4cbc37d5053 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -1017,6 +1017,7 @@ public class ResourceLocalizationService extends CompositeService } } if (UserGroupInformation.isSecurityEnabled()) { + credentials = new Credentials(credentials); LocalizerTokenIdentifier id = secretManager.createIdentifier(); Token localizerToken = new Token(id, secretManager); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java index 2d80bb90a0b..10da1bee13d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java @@ -32,6 +32,7 @@ import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -46,6 +47,8 @@ import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.AbstractFileSystem; import org.apache.hadoop.fs.FSDataInputStream; @@ -77,6 +80,7 @@ import org.mockito.stubbing.Answer; public class TestContainerLocalizer { + static final Log LOG = LogFactory.getLog(TestContainerLocalizer.class); static final Path basedir = new Path("target", TestContainerLocalizer.class.getName()); @@ -94,7 +98,10 @@ public class TestContainerLocalizer { @Test public void testContainerLocalizerMain() throws Exception { - ContainerLocalizer localizer = setupContainerLocalizerForTest(); + FileContext fs = FileContext.getLocalFSFileContext(); + spylfs = spy(fs.getDefaultFileSystem()); + ContainerLocalizer localizer = + setupContainerLocalizerForTest(); // verify created cache List privCacheList = new ArrayList(); @@ -190,11 +197,25 @@ public class TestContainerLocalizer { } })); } + + @Test + @SuppressWarnings("unchecked") + public void testLocalizerTokenIsGettingRemoved() throws Exception { + FileContext fs = FileContext.getLocalFSFileContext(); + spylfs = spy(fs.getDefaultFileSystem()); + ContainerLocalizer localizer = setupContainerLocalizerForTest(); + doNothing().when(localizer).localizeFiles(any(LocalizationProtocol.class), + any(CompletionService.class), any(UserGroupInformation.class)); + localizer.runLocalization(nmAddr); + verify(spylfs, times(1)).delete(tokenPath, false); + } @Test @SuppressWarnings("unchecked") // mocked generics public void testContainerLocalizerClosesFilesystems() throws Exception { // verify filesystems are closed when localizer doesn't fail + FileContext fs = FileContext.getLocalFSFileContext(); + spylfs = spy(fs.getDefaultFileSystem()); ContainerLocalizer localizer = setupContainerLocalizerForTest(); doNothing().when(localizer).localizeFiles(any(LocalizationProtocol.class), any(CompletionService.class), any(UserGroupInformation.class)); @@ -203,6 +224,7 @@ public class TestContainerLocalizer { localizer.runLocalization(nmAddr); verify(localizer).closeFileSystems(any(UserGroupInformation.class)); + spylfs = spy(fs.getDefaultFileSystem()); // verify filesystems are closed when localizer fails localizer = setupContainerLocalizerForTest(); doThrow(new YarnRuntimeException("Forced Failure")).when(localizer).localizeFiles( @@ -217,7 +239,6 @@ public class TestContainerLocalizer { @SuppressWarnings("unchecked") // mocked generics private ContainerLocalizer setupContainerLocalizerForTest() throws Exception { - spylfs = spy(FileContext.getLocalFSFileContext().getDefaultFileSystem()); // don't actually create dirs doNothing().when(spylfs).mkdir( isA(Path.class), isA(FsPermission.class), anyBoolean()); @@ -245,10 +266,10 @@ public class TestContainerLocalizer { containerId))); doReturn(new FSDataInputStream(new FakeFSDataInputStream(appTokens)) ).when(spylfs).open(tokenPath); - nmProxy = mock(LocalizationProtocol.class); doReturn(nmProxy).when(localizer).getProxy(nmAddr); doNothing().when(localizer).sleep(anyInt()); + // return result instantly for deterministic test ExecutorService syncExec = mock(ExecutorService.class); From be3edccf0acf55e710b0ec8ab8ce8418da74c615 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sat, 5 Oct 2013 06:08:48 +0000 Subject: [PATCH 055/133] YARN-1273. Fixed Distributed-shell to account for containers that failed to start. Contributed by Hitesh Shah. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529389 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../distributedshell/ApplicationMaster.java | 33 +++++--- .../applications/distributedshell/Client.java | 12 ++- .../ContainerLaunchFailAppMaster.java | 83 +++++++++++++++++++ .../TestDistributedShell.java | 33 +++++++- 5 files changed, 149 insertions(+), 15 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 986af09f211..360e9841b83 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -174,6 +174,9 @@ Release 2.1.2 - UNRELEASED YARN-1254. Fixed NodeManager to not pollute container's credentials. (Omkar Vinit Joshi via vinodkv) + YARN-1273. Fixed Distributed-shell to account for containers that failed + to start. (Hitesh Shah via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 959ba1c45f0..fa6eb9040d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -34,6 +34,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -281,8 +282,8 @@ public class ApplicationMaster { } } - public ApplicationMaster() throws Exception { - // Set up the configuration and RPC + public ApplicationMaster() { + // Set up the configuration conf = new YarnConfiguration(); } @@ -470,7 +471,7 @@ public class ApplicationMaster { amRMClient.init(conf); amRMClient.start(); - containerListener = new NMCallbackHandler(); + containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); @@ -500,7 +501,6 @@ public class ApplicationMaster { containerMemory = maxMem; } - // Setup ask for containers from RM // Send request for containers to RM // Until we get our fully allocated quota, we keep on polling RM for @@ -513,7 +513,8 @@ public class ApplicationMaster { } numRequestedContainers.set(numTotalContainers); - while (!done) { + while (!done + && (numCompletedContainers.get() != numTotalContainers)) { try { Thread.sleep(200); } catch (InterruptedException ex) {} @@ -522,7 +523,12 @@ public class ApplicationMaster { return success; } - + + @VisibleForTesting + NMCallbackHandler createNMCallbackHandler() { + return new NMCallbackHandler(this); + } + private void finish() { // Join all launched threads // needed for when we time out @@ -566,7 +572,6 @@ public class ApplicationMaster { LOG.error("Failed to unregister application", e); } - done = true; amRMClient.stop(); } @@ -679,10 +684,17 @@ public class ApplicationMaster { } } - private class NMCallbackHandler implements NMClientAsync.CallbackHandler { + @VisibleForTesting + static class NMCallbackHandler + implements NMClientAsync.CallbackHandler { private ConcurrentMap containers = new ConcurrentHashMap(); + private final ApplicationMaster applicationMaster; + + public NMCallbackHandler(ApplicationMaster applicationMaster) { + this.applicationMaster = applicationMaster; + } public void addContainer(ContainerId containerId, Container container) { containers.putIfAbsent(containerId, container); @@ -713,7 +725,7 @@ public class ApplicationMaster { } Container container = containers.get(containerId); if (container != null) { - nmClientAsync.getContainerStatusAsync(containerId, container.getNodeId()); + applicationMaster.nmClientAsync.getContainerStatusAsync(containerId, container.getNodeId()); } } @@ -721,6 +733,8 @@ public class ApplicationMaster { public void onStartContainerError(ContainerId containerId, Throwable t) { LOG.error("Failed to start Container " + containerId); containers.remove(containerId); + applicationMaster.numCompletedContainers.incrementAndGet(); + applicationMaster.numFailedContainers.incrementAndGet(); } @Override @@ -847,7 +861,6 @@ public class ApplicationMaster { /** * Setup the request that will be sent to the RM for the container ask. * - * @param numContainers Containers to ask for from RM * @return the setup ResourceRequest to be sent to RM */ private ContainerRequest setupContainerAskForRM() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index 7d51a6783f5..01e030a6776 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -125,8 +125,7 @@ public class Client { // Application master jar file private String appMasterJar = ""; // Main class to invoke application master - private final String appMasterMainClass = - "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"; + private final String appMasterMainClass; // Shell command to be executed private String shellCommand = ""; @@ -193,8 +192,14 @@ public class Client { /** */ public Client(Configuration conf) throws Exception { - + this( + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster", + conf); + } + + Client(String appMasterMainClass, Configuration conf) { this.conf = conf; + this.appMasterMainClass = appMasterMainClass; yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); opts = new Options(); @@ -214,6 +219,7 @@ public class Client { opts.addOption("log_properties", true, "log4j.properties file"); opts.addOption("debug", false, "Dump out debug information"); opts.addOption("help", false, "Print usage"); + } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java new file mode 100644 index 00000000000..2692fff5014 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.applications.distributedshell; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.ContainerId; + +import java.nio.ByteBuffer; +import java.util.Map; + +public class ContainerLaunchFailAppMaster extends ApplicationMaster { + + private static final Log LOG = + LogFactory.getLog(ContainerLaunchFailAppMaster.class); + + public ContainerLaunchFailAppMaster() { + super(); + } + + @Override + NMCallbackHandler createNMCallbackHandler() { + return new FailContainerLaunchNMCallbackHandler(this); + } + + class FailContainerLaunchNMCallbackHandler + extends ApplicationMaster.NMCallbackHandler { + + public FailContainerLaunchNMCallbackHandler( + ApplicationMaster applicationMaster) { + super(applicationMaster); + } + + @Override + public void onContainerStarted(ContainerId containerId, + Map allServiceResponse) { + super.onStartContainerError(containerId, + new RuntimeException("Inject Container Launch failure")); + } + + } + + public static void main(String[] args) { + boolean result = false; + try { + ContainerLaunchFailAppMaster appMaster = + new ContainerLaunchFailAppMaster(); + LOG.info("Initializing ApplicationMaster"); + boolean doRun = appMaster.init(args); + if (!doRun) { + System.exit(0); + } + result = appMaster.run(); + } catch (Throwable t) { + LOG.fatal("Error running ApplicationMaster", t); + System.exit(1); + } + if (result) { + LOG.info("Application Master completed successfully. exiting"); + System.exit(0); + } else { + LOG.info("Application Master failed. exiting"); + System.exit(2); + } + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java index 7fbd2a6c9de..f8a41b7395b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -59,7 +59,7 @@ public class TestDistributedShell { protected static String APPMASTER_JAR = JarFinder.getJar(ApplicationMaster.class); @BeforeClass - public static void setup() throws InterruptedException, Exception { + public static void setup() throws Exception { LOG.info("Starting up YARN cluster"); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); conf.setClass(YarnConfiguration.RM_SCHEDULER, @@ -135,7 +135,7 @@ public class TestDistributedShell { } catch (Exception e) { throw new RuntimeException(e); } - }; + } }; t.start(); @@ -248,5 +248,34 @@ public class TestDistributedShell { Thread.sleep(2000); } } + + @Test(timeout=90000) + public void testContainerLaunchFailureHandling() throws Exception { + String[] args = { + "--jar", + APPMASTER_JAR, + "--num_containers", + "2", + "--shell_command", + Shell.WINDOWS ? "dir" : "ls", + "--master_memory", + "512", + "--container_memory", + "128" + }; + + LOG.info("Initializing DS Client"); + Client client = new Client(ContainerLaunchFailAppMaster.class.getName(), + new Configuration(yarnCluster.getConfig())); + boolean initSuccess = client.init(args); + Assert.assertTrue(initSuccess); + LOG.info("Running DS Client"); + boolean result = client.run(); + + LOG.info("Client run completed. Result=" + result); + Assert.assertFalse(result); + + } + } From cae1ed9f6c438e20acdf385e95041f91ae349c71 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Sat, 5 Oct 2013 20:40:48 +0000 Subject: [PATCH 056/133] HDFS-5308. Replace HttpConfig#getSchemePrefix with implicit schemes in HDFS JSP. Contributed by Haohui Mai. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529512 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../server/datanode/DatanodeJspHelper.java | 27 +++++++------------ .../server/namenode/ClusterJspHelper.java | 2 +- .../server/namenode/NamenodeJspHelper.java | 2 +- .../hdfs/server/datanode/TestDatanodeJsp.java | 6 ++--- 5 files changed, 17 insertions(+), 23 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b4024a93726..cf6eba46a43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -362,6 +362,9 @@ Release 2.1.2 - UNRELEASED HDFS-5256. Use guava LoadingCache to implement DFSClientCache. (Haohui Mai via brandonli) + HDFS-5308. Replace HttpConfig#getSchemePrefix with implicit schemes in HDFS + JSP. (Haohui Mai via jing9) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java index 639468bbc75..11b27131485 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java @@ -19,9 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode; import java.io.File; import java.io.IOException; -import java.net.InetAddress; import java.net.InetSocketAddress; -import java.net.URI; import java.net.URL; import java.net.URLEncoder; import java.security.PrivilegedExceptionAction; @@ -39,7 +37,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; @@ -47,9 +44,6 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; import org.apache.hadoop.hdfs.server.common.JspHelper; -import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; -import org.apache.hadoop.hdfs.server.namenode.NameNode; -import org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer; import org.apache.hadoop.http.HtmlQuoting; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.net.NetUtils; @@ -225,7 +219,7 @@ public class DatanodeJspHelper { JspHelper.addTableFooter(out); } } - out.print("
    Go back to DFS home"); dfs.close(); @@ -302,8 +296,7 @@ public class DatanodeJspHelper { Long.MAX_VALUE).getLocatedBlocks(); // Add the various links for looking at the file contents // URL for downloading the full file - String downloadUrl = HttpConfig.getSchemePrefix() + req.getServerName() + ":" - + req.getServerPort() + "/streamFile" + ServletUtil.encodePath(filename) + String downloadUrl = "/streamFile" + ServletUtil.encodePath(filename) + JspHelper.getUrlParam(JspHelper.NAMENODE_ADDRESS, nnAddr, true) + JspHelper.getDelegationTokenUrlParam(tokenString); out.print(""); @@ -320,7 +313,7 @@ public class DatanodeJspHelper { return; } String fqdn = canonicalize(chosenNode.getIpAddr()); - String tailUrl = HttpConfig.getSchemePrefix() + fqdn + ":" + chosenNode.getInfoPort() + String tailUrl = "///" + fqdn + ":" + chosenNode.getInfoPort() + "/tail.jsp?filename=" + URLEncoder.encode(filename, "UTF-8") + "&namenodeInfoPort=" + namenodeInfoPort + "&chunkSizeToView=" + chunkSizeToView @@ -369,7 +362,7 @@ public class DatanodeJspHelper { String datanodeAddr = locs[j].getXferAddr(); datanodePort = locs[j].getXferPort(); fqdn = canonicalize(locs[j].getIpAddr()); - String blockUrl = HttpConfig.getSchemePrefix() + fqdn + ":" + locs[j].getInfoPort() + String blockUrl = "///" + fqdn + ":" + locs[j].getInfoPort() + "/browseBlock.jsp?blockId=" + blockidstring + "&blockSize=" + blockSize + "&filename=" + URLEncoder.encode(filename, "UTF-8") @@ -380,7 +373,7 @@ public class DatanodeJspHelper { + JspHelper.getDelegationTokenUrlParam(tokenString) + JspHelper.getUrlParam(JspHelper.NAMENODE_ADDRESS, nnAddr); - String blockInfoUrl = HttpConfig.getSchemePrefix() + nnCanonicalName + ":" + String blockInfoUrl = "///" + nnCanonicalName + ":" + namenodeInfoPort + "/block_info_xml.jsp?blockId=" + blockidstring; out.print("

    Snapshottable directories " @@ -391,7 +384,7 @@ public class DatanodeJspHelper { } out.println("
    "); out.print("
    "); - out.print("
    Go back to DFS home"); dfs.close(); @@ -491,9 +484,7 @@ public class DatanodeJspHelper { String parent = new File(filename).getParent(); JspHelper.printGotoForm(out, namenodeInfoPort, tokenString, parent, nnAddr); out.print("
    "); - out.print(" Date: Sat, 5 Oct 2013 22:04:53 +0000 Subject: [PATCH 057/133] YARN-1268. TestFairScheduer.testContinuousScheduling is flaky (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529529 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../scheduler/fair/TestFairScheduler.java | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 360e9841b83..3645fb6ba53 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -177,6 +177,8 @@ Release 2.1.2 - UNRELEASED YARN-1273. Fixed Distributed-shell to account for containers that failed to start. (Hitesh Shah via vinodkv) + YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index f09d2c4a04f..28b95e41963 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -2279,7 +2279,7 @@ public class TestFairScheduler { fs.applications, FSSchedulerApp.class); } - @Test + @Test (timeout = 5000) public void testContinuousScheduling() throws Exception { // set continuous scheduling enabled FairScheduler fs = new FairScheduler(); @@ -2315,11 +2315,13 @@ public class TestFairScheduler { // at least one pass Thread.sleep(fs.getConf().getContinuousSchedulingSleepMs() + 500); + FSSchedulerApp app = fs.applications.get(appAttemptId); + // Wait until app gets resources. + while (app.getCurrentConsumption().equals(Resources.none())) { } + // check consumption - Resource consumption = - fs.applications.get(appAttemptId).getCurrentConsumption(); - Assert.assertEquals(1024, consumption.getMemory()); - Assert.assertEquals(1, consumption.getVirtualCores()); + Assert.assertEquals(1024, app.getCurrentConsumption().getMemory()); + Assert.assertEquals(1, app.getCurrentConsumption().getVirtualCores()); } From 25361d56cf824ae2e68f45a6962146ba7bd54e01 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Sat, 5 Oct 2013 22:10:14 +0000 Subject: [PATCH 058/133] Fix location of YARN-1268 in CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529531 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3645fb6ba53..6e8ad1edbd9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -64,6 +64,8 @@ Release 2.3.0 - UNRELEASED YARN-1188. The context of QueueMetrics becomes default when using FairScheduler (Tsuyoshi Ozawa via Sandy Ryza) + YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza) + Release 2.2.0 - UNRELEASED INCOMPATIBLE CHANGES @@ -177,8 +179,6 @@ Release 2.1.2 - UNRELEASED YARN-1273. Fixed Distributed-shell to account for containers that failed to start. (Hitesh Shah via vinodkv) - YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza) - Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES From d55f3780fbf9308554ef3362c2be89651db43f46 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Sat, 5 Oct 2013 22:20:18 +0000 Subject: [PATCH 059/133] YARN-1032. Fixed NPE in RackResolver. Contributed by Lohit Vijayarenu. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529534 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../org/apache/hadoop/yarn/util/RackResolver.java | 12 ++++++++++-- .../apache/hadoop/yarn/util/TestRackResolver.java | 10 ++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6e8ad1edbd9..0c3a0307fbf 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -179,6 +179,8 @@ Release 2.1.2 - UNRELEASED YARN-1273. Fixed Distributed-shell to account for containers that failed to start. (Hitesh Shah via vinodkv) + YARN-1032. Fixed NPE in RackResolver. (Lohit Vijayarenu via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java index 601f8abc08e..cc2a56c3be6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/RackResolver.java @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.net.CachedDNSToSwitchMapping; import org.apache.hadoop.net.DNSToSwitchMapping; +import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.net.ScriptBasedMapping; @@ -98,8 +99,15 @@ public class RackResolver { List tmpList = new ArrayList(1); tmpList.add(hostName); List rNameList = dnsToSwitchMapping.resolve(tmpList); - String rName = rNameList.get(0); - LOG.info("Resolved " + hostName + " to " + rName); + String rName = null; + if (rNameList == null || rNameList.get(0) == null) { + rName = NetworkTopology.DEFAULT_RACK; + LOG.info("Couldn't resolve " + hostName + ". Falling back to " + + NetworkTopology.DEFAULT_RACK); + } else { + rName = rNameList.get(0); + LOG.info("Resolved " + hostName + " to " + rName); + } return new NodeBase(hostName, rName); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java index 42f7b9b5091..70ca23c3a2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestRackResolver.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.net.DNSToSwitchMapping; +import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; import org.junit.Assert; import org.junit.Test; @@ -35,6 +36,8 @@ import org.junit.Test; public class TestRackResolver { private static Log LOG = LogFactory.getLog(TestRackResolver.class); + private static final String invalidHost = "invalidHost"; + public static final class MyResolver implements DNSToSwitchMapping { @@ -50,6 +53,11 @@ public class TestRackResolver { if (hostList.isEmpty()) { return returnList; } + if (hostList.get(0).equals(invalidHost)) { + // Simulate condition where resolving host returns null + return null; + } + LOG.info("Received resolve request for " + hostList.get(0)); if (hostList.get(0).equals("host1") @@ -90,6 +98,8 @@ public class TestRackResolver { Assert.assertEquals("/rack1", node.getNetworkLocation()); node = RackResolver.resolve("host1"); Assert.assertEquals("/rack1", node.getNetworkLocation()); + node = RackResolver.resolve(invalidHost); + Assert.assertEquals(NetworkTopology.DEFAULT_RACK, node.getNetworkLocation()); } } From 5e2d93b4d3663319d5968f5b11b410c53f56d8c8 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Sat, 5 Oct 2013 22:26:42 +0000 Subject: [PATCH 060/133] YARN-1090. Fixed CS UI to better reflect applications as non-schedulable and not as pending. Contributed by Jian He. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529538 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../resourcemanager/scheduler/QueueMetrics.java | 2 +- .../webapp/CapacitySchedulerPage.java | 13 +++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0c3a0307fbf..73b48a164f6 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -181,6 +181,9 @@ Release 2.1.2 - UNRELEASED YARN-1032. Fixed NPE in RackResolver. (Lohit Vijayarenu via acmurthy) + YARN-1090. Fixed CS UI to better reflect applications as non-schedulable + and not as pending. (Jian He via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 9d2c739e480..8a030952504 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -73,7 +73,7 @@ public class QueueMetrics implements MetricsSource { @Metric("Reserved CPU in virtual cores") MutableGaugeInt reservedVCores; @Metric("# of reserved containers") MutableGaugeInt reservedContainers; @Metric("# of active users") MutableGaugeInt activeUsers; - @Metric("# of active users") MutableGaugeInt activeApplications; + @Metric("# of active applications") MutableGaugeInt activeApplications; private final MutableGaugeInt[] runningTime; private TimeBucketMetrics runBuckets; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java index 0bf851722e2..900c1a62dda 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java @@ -98,24 +98,25 @@ class CapacitySchedulerPage extends RmView { for (UserInfo entry: users) { activeUserList.append(entry.getUsername()).append(" <") .append(getPercentage(entry.getResourcesUsed(), usedResources)) - .append(", Active Apps: " + entry.getNumActiveApplications()) - .append(", Pending Apps: " + entry.getNumPendingApplications()) + .append(", Schedulable Apps: " + entry.getNumActiveApplications()) + .append(", Non-Schedulable Apps: " + entry.getNumPendingApplications()) .append(">
    "); //Force line break } ResponseInfo ri = info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status"). _("Queue State:", lqinfo.getQueueState()). _("Used Capacity:", percent(lqinfo.getUsedCapacity() / 100)). + _("Absolute Used Capacity:", percent(lqinfo.getAbsoluteUsedCapacity() / 100)). _("Absolute Capacity:", percent(lqinfo.getAbsoluteCapacity() / 100)). _("Absolute Max Capacity:", percent(lqinfo.getAbsoluteMaxCapacity() / 100)). _("Used Resources:", StringEscapeUtils.escapeHtml(lqinfo.getUsedResources().toString())). - _("Num Active Applications:", Integer.toString(lqinfo.getNumActiveApplications())). - _("Num Pending Applications:", Integer.toString(lqinfo.getNumPendingApplications())). + _("Num Schedulable Applications:", Integer.toString(lqinfo.getNumActiveApplications())). + _("Num Non-Schedulable Applications:", Integer.toString(lqinfo.getNumPendingApplications())). _("Num Containers:", Integer.toString(lqinfo.getNumContainers())). _("Max Applications:", Integer.toString(lqinfo.getMaxApplications())). _("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())). - _("Max Active Applications:", Integer.toString(lqinfo.getMaxActiveApplications())). - _("Max Active Applications Per User:", Integer.toString(lqinfo.getMaxActiveApplicationsPerUser())). + _("Max Schedulable Applications:", Integer.toString(lqinfo.getMaxActiveApplications())). + _("Max Schedulable Applications Per User:", Integer.toString(lqinfo.getMaxActiveApplicationsPerUser())). _("Configured Capacity:", percent(lqinfo.getCapacity() / 100)). _("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)). _("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%"). From 52b0ce35655b33d81132bd53b8c28e993cc8bfa9 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sun, 6 Oct 2013 00:14:55 +0000 Subject: [PATCH 061/133] YARN-1274. Fixed NodeManager's LinuxContainerExecutor to create user, app-dir and log-dirs correctly even when there are no resources to localize for the container. Contributed by Siddharth Seth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529555 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 ++ .../impl/container-executor.c | 65 +++++++++++++------ 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 73b48a164f6..dff651395ad 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -184,6 +184,10 @@ Release 2.1.2 - UNRELEASED YARN-1090. Fixed CS UI to better reflect applications as non-schedulable and not as pending. (Jian He via acmurthy) + YARN-1274. Fixed NodeManager's LinuxContainerExecutor to create user, app-dir + and log-dirs correctly even when there are no resources to localize for the + container. (Siddharth Seth via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index d5d894d42da..4fc21cbc742 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -751,28 +751,11 @@ int initialize_user(const char *user, char* const* local_dirs) { return failed ? INITIALIZE_USER_FAILED : 0; } -/** - * Function to prepare the application directories for the container. - */ -int initialize_app(const char *user, const char *app_id, - const char* nmPrivate_credentials_file, - char* const* local_dirs, char* const* log_roots, - char* const* args) { - if (app_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { - fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n"); - return INVALID_ARGUMENT_NUMBER; - } +int create_log_dirs(const char *app_id, char * const * log_dirs) { - // create the user directory on all disks - int result = initialize_user(user, local_dirs); - if (result != 0) { - return result; - } - - ////////////// create the log directories for the app on all disks char* const* log_root; char *any_one_app_log_dir = NULL; - for(log_root=log_roots; *log_root != NULL; ++log_root) { + for(log_root=log_dirs; *log_root != NULL; ++log_root) { char *app_log_dir = get_app_log_directory(*log_root, app_id); if (app_log_dir == NULL) { // try the next one @@ -791,7 +774,33 @@ int initialize_app(const char *user, const char *app_id, return -1; } free(any_one_app_log_dir); - ////////////// End of creating the log directories for the app on all disks + return 0; +} + + +/** + * Function to prepare the application directories for the container. + */ +int initialize_app(const char *user, const char *app_id, + const char* nmPrivate_credentials_file, + char* const* local_dirs, char* const* log_roots, + char* const* args) { + if (app_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { + fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n"); + return INVALID_ARGUMENT_NUMBER; + } + + // create the user directory on all disks + int result = initialize_user(user, local_dirs); + if (result != 0) { + return result; + } + + // create the log directories for the app on all disks + int log_create_result = create_log_dirs(app_id, log_roots); + if (log_create_result != 0) { + return log_create_result; + } // open up the credentials file int cred_file = open_file_as_nm(nmPrivate_credentials_file); @@ -922,18 +931,34 @@ int launch_container_as_user(const char *user, const char *app_id, } } + // create the user directory on all disks + int result = initialize_user(user, local_dirs); + if (result != 0) { + return result; + } + + // initializing log dirs + int log_create_result = create_log_dirs(app_id, log_dirs); + if (log_create_result != 0) { + return log_create_result; + } + // give up root privs if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) { exit_code = SETUID_OPER_FAILED; goto cleanup; } + // Create container specific directories as user. If there are no resources + // to localize for this container, app-directories and log-directories are + // also created automatically as part of this call. if (create_container_directories(user, app_id, container_id, local_dirs, log_dirs, work_dir) != 0) { fprintf(LOGFILE, "Could not create container dirs"); goto cleanup; } + // 700 if (copy_file(container_file_source, script_name, script_file_dest,S_IRWXU) != 0) { goto cleanup; From 8e0804666189ce9a66b7b41b744776bad29770dd Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Sun, 6 Oct 2013 03:22:57 +0000 Subject: [PATCH 062/133] HDFS-5306. Datanode https port is not available at the namenode. Contributed by Suresh Srinivas. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529562 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hadoop/hdfs/protocol/DatanodeID.java | 22 ++- .../hadoop/hdfs/protocol/DatanodeInfo.java | 21 ++- .../hadoop/hdfs/protocolPB/PBHelper.java | 4 +- .../blockmanagement/DatanodeManager.java | 63 +++---- .../hadoop/hdfs/server/common/JspHelper.java | 45 ++--- .../hadoop/hdfs/server/datanode/DataNode.java | 154 +++++------------- .../org/apache/hadoop/hdfs/web/JsonUtil.java | 29 +--- .../hadoop-hdfs/src/main/proto/hdfs.proto | 3 +- .../org/apache/hadoop/hdfs/DFSTestUtil.java | 69 +++----- .../hadoop/hdfs/TestDatanodeRegistration.java | 23 +-- .../hadoop/hdfs/TestFileInputStreamCache.java | 17 +- .../org/apache/hadoop/hdfs/TestPeerCache.java | 29 ++-- .../hdfs/server/common/TestJspHelper.java | 45 ++--- .../namenode/NNThroughputBenchmark.java | 45 ++--- .../TestCommitBlockSynchronization.java | 18 +- 16 files changed, 219 insertions(+), 371 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index cf6eba46a43..c5fe5c72b0b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -397,6 +397,9 @@ Release 2.1.2 - UNRELEASED HDFS-5300. FSNameSystem#deleteSnapshot() should not check owner in case of permissions disabled. (Vinay via jing9) + HDFS-5306. Datanode https port is not available at the namenode. (Suresh + Srinivas via brandonli) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java index 2a0578ca93f..9a012107b6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java @@ -43,6 +43,7 @@ public class DatanodeID implements Comparable { private String storageID; // unique per cluster storageID private int xferPort; // data streaming port private int infoPort; // info server port + private int infoSecurePort; // info server port private int ipcPort; // IPC server port public DatanodeID(DatanodeID from) { @@ -51,6 +52,7 @@ public class DatanodeID implements Comparable { from.getStorageID(), from.getXferPort(), from.getInfoPort(), + from.getInfoSecurePort(), from.getIpcPort()); this.peerHostName = from.getPeerHostName(); } @@ -65,12 +67,13 @@ public class DatanodeID implements Comparable { * @param ipcPort ipc server port */ public DatanodeID(String ipAddr, String hostName, String storageID, - int xferPort, int infoPort, int ipcPort) { + int xferPort, int infoPort, int infoSecurePort, int ipcPort) { this.ipAddr = ipAddr; this.hostName = hostName; this.storageID = storageID; this.xferPort = xferPort; this.infoPort = infoPort; + this.infoSecurePort = infoSecurePort; this.ipcPort = ipcPort; } @@ -128,6 +131,13 @@ public class DatanodeID implements Comparable { return ipAddr + ":" + infoPort; } + /** + * @return IP:infoPort string + */ + public String getInfoSecureAddr() { + return ipAddr + ":" + infoSecurePort; + } + /** * @return hostname:xferPort */ @@ -179,6 +189,13 @@ public class DatanodeID implements Comparable { return infoPort; } + /** + * @return infoSecurePort (the port at which the HTTPS server bound to) + */ + public int getInfoSecurePort() { + return infoSecurePort; + } + /** * @return ipcPort (the port at which the IPC server bound to) */ @@ -218,13 +235,14 @@ public class DatanodeID implements Comparable { peerHostName = nodeReg.getPeerHostName(); xferPort = nodeReg.getXferPort(); infoPort = nodeReg.getInfoPort(); + infoSecurePort = nodeReg.getInfoSecurePort(); ipcPort = nodeReg.getIpcPort(); } /** * Compare based on data transfer address. * - * @param that + * @param that datanode to compare with * @return as specified by Comparable */ @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java index 3f5715b0afd..1d2bbab15e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java @@ -17,10 +17,6 @@ */ package org.apache.hadoop.hdfs.protocol; -import static org.apache.hadoop.hdfs.DFSUtil.percent2String; - -import java.util.Date; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -32,6 +28,10 @@ import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; +import java.util.Date; + +import static org.apache.hadoop.hdfs.DFSUtil.percent2String; + /** * This class extends the primary identifier of a Datanode with ephemeral * state, eg usage information, current administrative state, and the @@ -108,18 +108,21 @@ public class DatanodeInfo extends DatanodeID implements Node { final long capacity, final long dfsUsed, final long remaining, final long blockPoolUsed, final long lastUpdate, final int xceiverCount, final AdminStates adminState) { - this(nodeID.getIpAddr(), nodeID.getHostName(), nodeID.getStorageID(), nodeID.getXferPort(), - nodeID.getInfoPort(), nodeID.getIpcPort(), capacity, dfsUsed, remaining, - blockPoolUsed, lastUpdate, xceiverCount, location, adminState); + this(nodeID.getIpAddr(), nodeID.getHostName(), nodeID.getStorageID(), + nodeID.getXferPort(), nodeID.getInfoPort(), nodeID.getInfoSecurePort(), + nodeID.getIpcPort(), capacity, dfsUsed, remaining, blockPoolUsed, + lastUpdate, xceiverCount, location, adminState); } /** Constructor */ public DatanodeInfo(final String ipAddr, final String hostName, - final String storageID, final int xferPort, final int infoPort, final int ipcPort, + final String storageID, final int xferPort, final int infoPort, + final int infoSecurePort, final int ipcPort, final long capacity, final long dfsUsed, final long remaining, final long blockPoolUsed, final long lastUpdate, final int xceiverCount, final String networkLocation, final AdminStates adminState) { - super(ipAddr, hostName, storageID, xferPort, infoPort, ipcPort); + super(ipAddr, hostName, storageID, xferPort, infoPort, + infoSecurePort, ipcPort); this.capacity = capacity; this.dfsUsed = dfsUsed; this.remaining = remaining; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index a3b60a765df..18a689fe678 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -223,7 +223,8 @@ public class PBHelper { // DatanodeId public static DatanodeID convert(DatanodeIDProto dn) { return new DatanodeID(dn.getIpAddr(), dn.getHostName(), dn.getStorageID(), - dn.getXferPort(), dn.getInfoPort(), dn.getIpcPort()); + dn.getXferPort(), dn.getInfoPort(), dn.hasInfoSecurePort() ? dn + .getInfoSecurePort() : 0, dn.getIpcPort()); } public static DatanodeIDProto convert(DatanodeID dn) { @@ -233,6 +234,7 @@ public class PBHelper { .setStorageID(dn.getStorageID()) .setXferPort(dn.getXferPort()) .setInfoPort(dn.getInfoPort()) + .setInfoSecurePort(dn.getInfoSecurePort()) .setIpcPort(dn.getIpcPort()).build(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 71d48a80d38..b236419813c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -17,21 +17,9 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import static org.apache.hadoop.util.Time.now; - -import java.io.IOException; -import java.io.PrintWriter; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableMap; -import java.util.TreeMap; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.net.InetAddresses; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -41,13 +29,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.hdfs.protocol.DatanodeID; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTargetPair; import org.apache.hadoop.hdfs.server.namenode.HostFileManager; import org.apache.hadoop.hdfs.server.namenode.HostFileManager.Entry; @@ -55,32 +38,23 @@ import org.apache.hadoop.hdfs.server.namenode.HostFileManager.EntrySet; import org.apache.hadoop.hdfs.server.namenode.HostFileManager.MutableEntrySet; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.Namesystem; -import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand; -import org.apache.hadoop.hdfs.server.protocol.BlockCommand; -import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand; +import org.apache.hadoop.hdfs.server.protocol.*; import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; -import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; -import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; -import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException; -import org.apache.hadoop.hdfs.server.protocol.RegisterCommand; import org.apache.hadoop.hdfs.util.CyclicIteration; import org.apache.hadoop.ipc.Server; -import org.apache.hadoop.net.CachedDNSToSwitchMapping; -import org.apache.hadoop.net.DNSToSwitchMapping; -import org.apache.hadoop.net.NetUtils; -import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.net.*; import org.apache.hadoop.net.NetworkTopology.InvalidTopologyException; -import org.apache.hadoop.net.Node; -import org.apache.hadoop.net.NodeBase; -import org.apache.hadoop.net.ScriptBasedMapping; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Time; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.net.InetAddresses; +import java.io.IOException; +import java.io.PrintWriter; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.*; + +import static org.apache.hadoop.util.Time.now; /** * Manage datanodes, include decommission and other activities. @@ -127,6 +101,8 @@ public class DatanodeManager { private final int defaultInfoPort; + private final int defaultInfoSecurePort; + private final int defaultIpcPort; /** Read include/exclude files*/ @@ -188,7 +164,10 @@ public class DatanodeManager { DFSConfigKeys.DFS_DATANODE_ADDRESS_DEFAULT)).getPort(); this.defaultInfoPort = NetUtils.createSocketAddr( conf.get(DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY, - DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_DEFAULT)).getPort(); + DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_DEFAULT)).getPort(); + this.defaultInfoSecurePort = NetUtils.createSocketAddr( + conf.get(DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY, + DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_DEFAULT)).getPort(); this.defaultIpcPort = NetUtils.createSocketAddr( conf.get(DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY, DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_DEFAULT)).getPort(); @@ -1128,6 +1107,7 @@ public class DatanodeManager { // The IP:port is sufficient for listing in a report dnId = new DatanodeID(hostStr, "", "", port, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT); } else { String ipAddr = ""; @@ -1138,6 +1118,7 @@ public class DatanodeManager { } dnId = new DatanodeID(ipAddr, hostStr, "", port, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT); } return dnId; @@ -1185,7 +1166,7 @@ public class DatanodeManager { new DatanodeDescriptor(new DatanodeID(entry.getIpAddress(), entry.getPrefix(), "", entry.getPort() == 0 ? defaultXferPort : entry.getPort(), - defaultInfoPort, defaultIpcPort)); + defaultInfoPort, defaultInfoSecurePort, defaultIpcPort)); dn.setLastUpdate(0); // Consider this node dead for reporting nodes.add(dn); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java index 0b82c12b1bd..2728353cc9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java @@ -18,25 +18,7 @@ package org.apache.hadoop.hdfs.server.common; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.net.URL; -import java.net.URLEncoder; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.TreeSet; - -import javax.servlet.ServletContext; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.jsp.JspWriter; - +import com.google.common.base.Charsets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -47,13 +29,9 @@ import org.apache.hadoop.hdfs.BlockReaderFactory; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.net.TcpPeerServer; -import org.apache.hadoop.hdfs.protocol.DatanodeID; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.protocol.LocatedBlocks; -import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey; +import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; +import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; @@ -74,10 +52,21 @@ import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.VersionInfo; -import com.google.common.base.Charsets; +import javax.servlet.ServletContext; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.jsp.JspWriter; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.net.URL; +import java.net.URLEncoder; +import java.util.*; -import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_HTTP_STATIC_USER; import static org.apache.hadoop.fs.CommonConfigurationKeys.DEFAULT_HADOOP_HTTP_STATIC_USER; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_HTTP_STATIC_USER; @InterfaceAudience.Private public class JspHelper { @@ -217,7 +206,7 @@ public class JspHelper { offsetIntoBlock, amtToRead, true, "JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey), new DatanodeID(addr.getAddress().getHostAddress(), - addr.getHostName(), poolId, addr.getPort(), 0, 0), null, + addr.getHostName(), poolId, addr.getPort(), 0, 0, 0), null, null, null, false, CachingStrategy.newDefaultStrategy()); final byte[] buf = new byte[amtToRead]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index b86a5caebd1..7b2b7d1b001 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -18,66 +18,10 @@ package org.apache.hadoop.hdfs.server.datanode; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ADMIN; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_HTTPS_NEED_AUTH_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_INTERFACE_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_INTERFACE_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_NAMESERVER_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_NAMESERVER_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_USER_NAME_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTPS_ENABLE_KEY; -import static org.apache.hadoop.util.ExitUtil.terminate; - -import java.io.BufferedOutputStream; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PrintStream; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.net.SocketException; -import java.net.SocketTimeoutException; -import java.net.URI; -import java.net.UnknownHostException; -import java.nio.channels.ClosedByInterruptException; -import java.nio.channels.SocketChannel; -import java.security.PrivilegedExceptionAction; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.protobuf.BlockingService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -94,37 +38,15 @@ import org.apache.hadoop.hdfs.HDFSPolicyProvider; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.net.DomainPeerServer; import org.apache.hadoop.hdfs.net.TcpPeerServer; -import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; -import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; -import org.apache.hadoop.hdfs.protocol.DatanodeID; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; -import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage; -import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor; -import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol; -import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair; -import org.apache.hadoop.hdfs.protocol.datatransfer.Sender; +import org.apache.hadoop.hdfs.protocol.*; +import org.apache.hadoop.hdfs.protocol.datatransfer.*; import org.apache.hadoop.hdfs.protocol.proto.ClientDatanodeProtocolProtos.ClientDatanodeProtocolService; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.DNTransferAckProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status; import org.apache.hadoop.hdfs.protocol.proto.InterDatanodeProtocolProtos.InterDatanodeProtocolService; -import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolPB; -import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolServerSideTranslatorPB; -import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; -import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolPB; -import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolServerSideTranslatorPB; -import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolTranslatorPB; -import org.apache.hadoop.hdfs.protocolPB.PBHelper; -import org.apache.hadoop.hdfs.security.token.block.BlockPoolTokenSecretManager; -import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; -import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; +import org.apache.hadoop.hdfs.protocolPB.*; +import org.apache.hadoop.hdfs.security.token.block.*; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; -import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; -import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; @@ -139,11 +61,7 @@ import org.apache.hadoop.hdfs.server.datanode.web.resources.DatanodeWebHdfsMetho import org.apache.hadoop.hdfs.server.namenode.FileChecksumServlets; import org.apache.hadoop.hdfs.server.namenode.StreamFile; import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; -import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; -import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; -import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; +import org.apache.hadoop.hdfs.server.protocol.*; import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.hdfs.web.resources.Param; import org.apache.hadoop.http.HttpServer; @@ -164,22 +82,21 @@ import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.Daemon; -import org.apache.hadoop.util.DiskChecker; +import org.apache.hadoop.util.*; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.JvmPauseMonitor; -import org.apache.hadoop.util.ServicePlugin; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.Time; -import org.apache.hadoop.util.VersionInfo; import org.mortbay.util.ajax.JSON; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.protobuf.BlockingService; +import java.io.*; +import java.net.*; +import java.nio.channels.ClosedByInterruptException; +import java.nio.channels.SocketChannel; +import java.security.PrivilegedExceptionAction; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.*; +import static org.apache.hadoop.util.ExitUtil.terminate; /********************************************************** * DataNode is a class (and program) that stores a set of @@ -261,6 +178,7 @@ public class DataNode extends Configured private volatile boolean heartbeatsDisabledForTests = false; private DataStorage storage = null; private HttpServer infoServer = null; + private int infoSecurePort; DataNodeMetrics metrics; private InetSocketAddress streamingAddr; @@ -384,16 +302,13 @@ public class DataNode extends Configured InetSocketAddress infoSocAddr = DataNode.getInfoAddr(conf); String infoHost = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); - this.infoServer = (secureResources == null) - ? new HttpServer.Builder().setName("datanode") - .setBindAddress(infoHost).setPort(tmpInfoPort) - .setFindPort(tmpInfoPort == 0).setConf(conf) - .setACL(new AccessControlList(conf.get(DFS_ADMIN, " "))).build() - : new HttpServer.Builder().setName("datanode") - .setBindAddress(infoHost).setPort(tmpInfoPort) - .setFindPort(tmpInfoPort == 0).setConf(conf) - .setACL(new AccessControlList(conf.get(DFS_ADMIN, " "))) - .setConnector(secureResources.getListener()).build(); + HttpServer.Builder builder = new HttpServer.Builder().setName("datanode") + .setBindAddress(infoHost).setPort(tmpInfoPort) + .setFindPort(tmpInfoPort == 0).setConf(conf) + .setACL(new AccessControlList(conf.get(DFS_ADMIN, " "))); + this.infoServer = (secureResources == null) ? builder.build() : + builder.setConnector(secureResources.getListener()).build(); + LOG.info("Opened info server at " + infoHost + ":" + tmpInfoPort); if (conf.getBoolean(DFS_HTTPS_ENABLE_KEY, false)) { boolean needClientAuth = conf.getBoolean(DFS_CLIENT_HTTPS_NEED_AUTH_KEY, @@ -407,6 +322,7 @@ public class DataNode extends Configured if(LOG.isDebugEnabled()) { LOG.debug("Datanode listening for SSL on " + secInfoSocAddr); } + infoSecurePort = secInfoSocAddr.getPort(); } this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class); this.infoServer.addInternalServlet(null, "/getFileChecksum/*", @@ -775,7 +691,8 @@ public class DataNode extends Configured } DatanodeID dnId = new DatanodeID( streamingAddr.getAddress().getHostAddress(), hostName, - getStorageId(), getXferPort(), getInfoPort(), getIpcPort()); + getStorageId(), getXferPort(), getInfoPort(), + infoSecurePort, getIpcPort()); return new DatanodeRegistration(dnId, storageInfo, new ExportedBlockKeys(), VersionInfo.getVersion()); } @@ -873,7 +790,7 @@ public class DataNode extends Configured * If this is the first block pool to register, this also initializes * the datanode-scoped storage. * - * @param nsInfo the handshake response from the NN. + * @param bpos Block pool offer service * @throws IOException if the NN is inconsistent with the local storage. */ void initBlockPool(BPOfferService bpos) throws IOException { @@ -2330,6 +2247,13 @@ public class DataNode extends Configured return infoServer.getPort(); } + /** + * @return the datanode's https port + */ + public int getInfoSecurePort() { + return infoSecurePort; + } + /** * Returned information is a JSON representation of a map with * name node host name as the key and block pool Id as the value. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 840087393cd..9fb6dfd3d0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -17,29 +17,11 @@ */ package org.apache.hadoop.hdfs.web; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import org.apache.hadoop.fs.ContentSummary; -import org.apache.hadoop.fs.FileChecksum; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum; -import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum; -import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; +import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.namenode.INodeId; @@ -50,6 +32,11 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.StringUtils; import org.mortbay.util.ajax.JSON; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.*; + /** JSON Utilities */ public class JsonUtil { private static final Object[] EMPTY_OBJECT_ARRAY = {}; @@ -295,6 +282,7 @@ public class JsonUtil { m.put("storageID", datanodeinfo.getStorageID()); m.put("xferPort", datanodeinfo.getXferPort()); m.put("infoPort", datanodeinfo.getInfoPort()); + m.put("infoSecurePort", datanodeinfo.getInfoSecurePort()); m.put("ipcPort", datanodeinfo.getIpcPort()); m.put("capacity", datanodeinfo.getCapacity()); @@ -320,6 +308,7 @@ public class JsonUtil { (String)m.get("storageID"), (int)(long)(Long)m.get("xferPort"), (int)(long)(Long)m.get("infoPort"), + (int)(long)(Long)m.get("infoSecurePort"), (int)(long)(Long)m.get("ipcPort"), (Long)m.get("capacity"), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto index 085f629a8d3..7a9d0adbd10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto @@ -52,8 +52,9 @@ message DatanodeIDProto { required string hostName = 2; // hostname required string storageID = 3; // unique storage id required uint32 xferPort = 4; // data streaming port - required uint32 infoPort = 5; // info server port + required uint32 infoPort = 5; // datanode http port required uint32 ipcPort = 6; // ipc server port + optional uint32 infoSecurePort = 7 [default = 0]; // datanode https port } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index f0c10b0a2fe..5f770023ca0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -18,60 +18,20 @@ package org.apache.hadoop.hdfs; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY; -import static org.junit.Assert.assertEquals; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.HttpURLConnection; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.net.URL; -import java.net.URLConnection; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.TimeoutException; - +import com.google.common.base.Charsets; +import com.google.common.base.Joiner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; -import org.apache.hadoop.hdfs.protocol.DatanodeID; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.datatransfer.Sender; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; @@ -93,8 +53,15 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.VersionInfo; -import com.google.common.base.Charsets; -import com.google.common.base.Joiner; +import java.io.*; +import java.net.*; +import java.security.PrivilegedExceptionAction; +import java.util.*; +import java.util.concurrent.TimeoutException; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY; +import static org.junit.Assert.assertEquals; /** Utilities for HDFS tests */ public class DFSTestUtil { @@ -114,10 +81,10 @@ public class DFSTestUtil { /** Creates a new instance of DFSTestUtil * - * @param testName Name of the test from where this utility is used * @param nFiles Number of files to be created * @param maxLevels Maximum number of directory levels * @param maxSize Maximum size for file + * @param minSize Minimum size for file */ private DFSTestUtil(int nFiles, int maxLevels, int maxSize, int minSize) { this.nFiles = nFiles; @@ -143,7 +110,7 @@ public class DFSTestUtil { } /** - * when formating a namenode - we must provide clusterid. + * when formatting a namenode - we must provide clusterid. * @param conf * @throws IOException */ @@ -806,6 +773,7 @@ public class DFSTestUtil { return new DatanodeID(ipAddr, "localhost", "", DFSConfigKeys.DFS_DATANODE_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT); } @@ -815,7 +783,7 @@ public class DFSTestUtil { public static DatanodeID getLocalDatanodeID(int port) { return new DatanodeID("127.0.0.1", "localhost", "", - port, port, port); + port, port, port, port); } public static DatanodeDescriptor getLocalDatanodeDescriptor() { @@ -838,6 +806,7 @@ public class DFSTestUtil { String host, int port) { return new DatanodeInfo(new DatanodeID(ipAddr, host, "", port, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT)); } @@ -846,6 +815,7 @@ public class DFSTestUtil { return new DatanodeInfo(ipAddr, hostname, "", DFSConfigKeys.DFS_DATANODE_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT, 1, 2, 3, 4, 5, 6, "local", adminState); } @@ -860,6 +830,7 @@ public class DFSTestUtil { int port, String rackLocation) { DatanodeID dnId = new DatanodeID(ipAddr, "host", "", port, DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT); return new DatanodeDescriptor(dnId, rackLocation); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java index a94d1f4dd2c..af015b4a7ce 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java @@ -17,13 +17,6 @@ */ package org.apache.hadoop.hdfs; -import static org.junit.Assert.*; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; - -import java.net.InetSocketAddress; -import java.security.Permission; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -40,6 +33,13 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.VersionInfo; import org.junit.Test; +import java.net.InetSocketAddress; +import java.security.Permission; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; + /** * This class tests data node registration. */ @@ -157,7 +157,8 @@ public class TestDatanodeRegistration { final String DN_HOSTNAME = "localhost"; final int DN_XFER_PORT = 12345; final int DN_INFO_PORT = 12346; - final int DN_IPC_PORT = 12347; + final int DN_INFO_SECURE_PORT = 12347; + final int DN_IPC_PORT = 12348; Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; try { @@ -172,7 +173,8 @@ public class TestDatanodeRegistration { // register a datanode DatanodeID dnId = new DatanodeID(DN_IP_ADDR, DN_HOSTNAME, - "fake-storage-id", DN_XFER_PORT, DN_INFO_PORT, DN_IPC_PORT); + "fake-storage-id", DN_XFER_PORT, DN_INFO_PORT, DN_INFO_SECURE_PORT, + DN_IPC_PORT); long nnCTime = cluster.getNamesystem().getFSImage().getStorage() .getCTime(); StorageInfo mockStorageInfo = mock(StorageInfo.class); @@ -188,7 +190,8 @@ public class TestDatanodeRegistration { // register the same datanode again with a different storage ID dnId = new DatanodeID(DN_IP_ADDR, DN_HOSTNAME, - "changed-fake-storage-id", DN_XFER_PORT, DN_INFO_PORT, DN_IPC_PORT); + "changed-fake-storage-id", DN_XFER_PORT, DN_INFO_PORT, + DN_INFO_SECURE_PORT, DN_IPC_PORT); dnReg = new DatanodeRegistration(dnId, mockStorageInfo, null, VersionInfo.getVersion()); rpcServer.registerDatanode(dnReg); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java index 7f28a43ceb4..2d02f3bd172 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java @@ -17,12 +17,7 @@ */ package org.apache.hadoop.hdfs; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; - import junit.framework.Assert; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -31,6 +26,10 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.unix.TemporarySocketDirectory; import org.junit.Test; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + public class TestFileInputStreamCache { static final Log LOG = LogFactory.getLog(TestFileInputStreamCache.class); @@ -80,7 +79,7 @@ public class TestFileInputStreamCache { public void testAddAndRetrieve() throws Exception { FileInputStreamCache cache = new FileInputStreamCache(1, 1000000); DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", - "xyzzy", 8080, 9090, 7070); + "xyzzy", 8080, 9090, 7070, 6060); ExtendedBlock block = new ExtendedBlock("poolid", 123); TestFileDescriptorPair pair = new TestFileDescriptorPair(); cache.put(dnId, block, pair.getFileInputStreams()); @@ -94,7 +93,7 @@ public class TestFileInputStreamCache { public void testExpiry() throws Exception { FileInputStreamCache cache = new FileInputStreamCache(1, 10); DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", - "xyzzy", 8080, 9090, 7070); + "xyzzy", 8080, 9090, 7070, 6060); ExtendedBlock block = new ExtendedBlock("poolid", 123); TestFileDescriptorPair pair = new TestFileDescriptorPair(); cache.put(dnId, block, pair.getFileInputStreams()); @@ -109,12 +108,12 @@ public class TestFileInputStreamCache { public void testEviction() throws Exception { FileInputStreamCache cache = new FileInputStreamCache(1, 10000000); DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", - "xyzzy", 8080, 9090, 7070); + "xyzzy", 8080, 9090, 7070, 6060); ExtendedBlock block = new ExtendedBlock("poolid", 123); TestFileDescriptorPair pair = new TestFileDescriptorPair(); cache.put(dnId, block, pair.getFileInputStreams()); DatanodeID dnId2 = new DatanodeID("127.0.0.1", "localhost", - "xyzzy", 8081, 9091, 7071); + "xyzzy", 8081, 9091, 7071, 6061); TestFileDescriptorPair pair2 = new TestFileDescriptorPair(); cache.put(dnId2, block, pair2.getFileInputStreams()); FileInputStream fis[] = cache.get(dnId, block); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java index 7836bc66805..de52806629d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java @@ -17,26 +17,23 @@ */ package org.apache.hadoop.hdfs; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.channels.ReadableByteChannel; - +import com.google.common.collect.HashMultiset; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.net.Peer; +import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.net.unix.DomainSocket; import org.junit.Test; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import com.google.common.collect.HashMultiset; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.channels.ReadableByteChannel; + +import static org.junit.Assert.*; public class TestPeerCache { static final Log LOG = LogFactory.getLog(TestPeerCache.class); @@ -150,7 +147,7 @@ public class TestPeerCache { PeerCache cache = new PeerCache(3, 100000); DatanodeID dnId = new DatanodeID("192.168.0.1", "fakehostname", "fake_storage_id", - 100, 101, 102); + 100, 101, 102, 103); FakePeer peer = new FakePeer(dnId, false); cache.put(dnId, peer); assertTrue(!peer.isClosed()); @@ -170,7 +167,7 @@ public class TestPeerCache { for (int i = 0; i < CAPACITY; ++i) { dnIds[i] = new DatanodeID("192.168.0.1", "fakehostname_" + i, "fake_storage_id", - 100, 101, 102); + 100, 101, 102, 103); peers[i] = new FakePeer(dnIds[i], false); } for (int i = 0; i < CAPACITY; ++i) { @@ -201,7 +198,7 @@ public class TestPeerCache { for (int i = 0; i < dnIds.length; ++i) { dnIds[i] = new DatanodeID("192.168.0.1", "fakehostname_" + i, "fake_storage_id_" + i, - 100, 101, 102); + 100, 101, 102, 103); peers[i] = new FakePeer(dnIds[i], false); } for (int i = 0; i < CAPACITY; ++i) { @@ -232,7 +229,7 @@ public class TestPeerCache { PeerCache cache = new PeerCache(CAPACITY, 100000); DatanodeID dnId = new DatanodeID("192.168.0.1", "fakehostname", "fake_storage_id", - 100, 101, 102); + 100, 101, 102, 103); HashMultiset peers = HashMultiset.create(CAPACITY); for (int i = 0; i < CAPACITY; ++i) { FakePeer peer = new FakePeer(dnId, false); @@ -257,7 +254,7 @@ public class TestPeerCache { PeerCache cache = new PeerCache(CAPACITY, 100000); DatanodeID dnId = new DatanodeID("192.168.0.1", "fakehostname", "fake_storage_id", - 100, 101, 102); + 100, 101, 102, 103); HashMultiset peers = HashMultiset.create(CAPACITY); for (int i = 0; i < CAPACITY; ++i) { FakePeer peer = new FakePeer(dnId, i == CAPACITY - 1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java index bd523963409..3c3485dcdc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java @@ -17,31 +17,7 @@ */ package org.apache.hadoop.hdfs.server.common; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.mockito.Mockito.doAnswer; - -import java.io.IOException; -import java.io.StringReader; -import java.net.InetSocketAddress; -import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import javax.servlet.ServletContext; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.jsp.JspWriter; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - - +import com.google.common.base.Strings; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -70,7 +46,20 @@ import org.mockito.stubbing.Answer; import org.xml.sax.InputSource; import org.xml.sax.SAXException; -import com.google.common.base.Strings; +import javax.servlet.ServletContext; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.jsp.JspWriter; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.io.StringReader; +import java.net.InetSocketAddress; +import java.text.MessageFormat; +import java.util.ArrayList; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; public class TestJspHelper { @@ -459,9 +448,9 @@ public class TestJspHelper { @Test public void testSortNodeByFields() throws Exception { DatanodeID dnId1 = new DatanodeID("127.0.0.1", "localhost1", "storage1", - 1234, 2345, 3456); + 1234, 2345, 3456, 4567); DatanodeID dnId2 = new DatanodeID("127.0.0.2", "localhost2", "storage2", - 1235, 2346, 3457); + 1235, 2346, 3457, 4568); DatanodeDescriptor dnDesc1 = new DatanodeDescriptor(dnId1, "rack1", 1024, 100, 924, 100, 10, 2); DatanodeDescriptor dnDesc2 = new DatanodeDescriptor(dnId2, "rack2", 2500, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java index 3156de4e93a..b6882d67036 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java @@ -17,14 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.List; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; @@ -33,40 +25,28 @@ import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; -import org.apache.hadoop.hdfs.protocol.DatanodeID; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataStorage; -import org.apache.hadoop.hdfs.server.protocol.BlockCommand; -import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; -import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; -import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; -import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; -import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; -import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; -import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; -import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; -import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.hdfs.server.protocol.*; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.net.DNS; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.Groups; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.Time; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.util.VersionInfo; +import org.apache.hadoop.util.*; import org.apache.log4j.Level; import org.apache.log4j.LogManager; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; + /** * Main class for a series of name-node benchmarks. * @@ -839,6 +819,7 @@ public class NNThroughputBenchmark implements Tool { DNS.getDefaultHost("default", "default"), "", getNodePort(dnIdx), DFSConfigKeys.DFS_DATANODE_HTTP_DEFAULT_PORT, + DFSConfigKeys.DFS_DATANODE_HTTPS_DEFAULT_PORT, DFSConfigKeys.DFS_DATANODE_IPC_DEFAULT_PORT), new DataStorage(nsInfo, ""), new ExportedBlockKeys(), VersionInfo.getVersion()); @@ -1305,7 +1286,7 @@ public class NNThroughputBenchmark implements Tool { /** * Main method of the benchmark. - * @param args command line parameters + * @param aArgs command line parameters */ @Override // Tool public int run(String[] aArgs) throws Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java index f40b799d1a8..83a58652910 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java @@ -18,15 +18,6 @@ package org.apache.hadoop.hdfs.server.namenode; -import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyBoolean; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; - -import java.io.IOException; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -37,6 +28,13 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.junit.Test; +import java.io.IOException; + +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Mockito.*; + /** * Verify that TestCommitBlockSynchronization is idempotent. */ @@ -177,7 +175,7 @@ public class TestCommitBlockSynchronization { Block block = new Block(blockId, length, genStamp); FSNamesystem namesystemSpy = makeNameSystemSpy(block, file); DatanodeID[] newTargets = new DatanodeID[]{ - new DatanodeID("0.0.0.0", "nonexistantHost", "1", 0, 0, 0)}; + new DatanodeID("0.0.0.0", "nonexistantHost", "1", 0, 0, 0, 0)}; ExtendedBlock lastBlock = new ExtendedBlock(); namesystemSpy.commitBlockSynchronization( From cb5a51565a70d89e83486b8eadfd7b2b44257c4c Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Sun, 6 Oct 2013 06:15:31 +0000 Subject: [PATCH 063/133] HADOOP-10017. Fix NPE in DFSClient#getDelegationToken when doing Distcp from a secured cluster to an insecured cluster. Contributed by Haohui Mai. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529571 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../main/java/org/apache/hadoop/hdfs/DFSClient.java | 9 +++++++-- .../hadoop/hdfs/TestDistributedFileSystem.java | 13 +++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 09002cdf859..12911a3cdbc 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -435,6 +435,9 @@ Release 2.1.2 - UNRELEASED HADOOP-10003. HarFileSystem.listLocatedStatus() fails. (Jason Dere and suresh via suresh) + HADOOP-10017. Fix NPE in DFSClient#getDelegationToken when doing Distcp + from a secured cluster to an insecured cluster. (Haohui Mai via jing9) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 305e9c25534..1e1b9861dbe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -900,10 +900,15 @@ public class DFSClient implements java.io.Closeable { assert dtService != null; Token token = namenode.getDelegationToken(renewer); - token.setService(this.dtService); - LOG.info("Created " + DelegationTokenIdentifier.stringifyToken(token)); + if (token != null) { + token.setService(this.dtService); + LOG.info("Created " + DelegationTokenIdentifier.stringifyToken(token)); + } else { + LOG.info("Cannot get delegation token from " + renewer); + } return token; + } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index a57ad745612..0a97a620014 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -94,6 +94,19 @@ public class TestDistributedFileSystem { return conf; } + @Test + public void testEmptyDelegationToken() throws IOException { + Configuration conf = getTestConfiguration(); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + FileSystem fileSys = cluster.getFileSystem(); + fileSys.getDelegationToken(""); + } finally { + cluster.shutdown(); + } + } + @Test public void testFileSystemCloseAll() throws Exception { Configuration conf = getTestConfiguration(); From 44f7ee7192d150b43a3012f06cec67be5b64edd0 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sun, 6 Oct 2013 18:32:16 +0000 Subject: [PATCH 064/133] YARN-1278. Fixed NodeManager to not delete local resources for apps on resync command from RM - a bug caused by YARN-1149. Contributed by Hitesh Shah. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529657 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../CMgrCompletedContainersEvent.java | 30 +++++++- .../yarn/server/nodemanager/NodeManager.java | 3 +- .../nodemanager/NodeStatusUpdaterImpl.java | 3 +- .../ContainerManagerImpl.java | 77 +++++++++++-------- .../nodemanager/TestNodeManagerResync.java | 7 +- .../nodemanager/TestNodeStatusUpdater.java | 20 ++--- 7 files changed, 94 insertions(+), 49 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index dff651395ad..2aa8c55659a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -188,6 +188,9 @@ Release 2.1.2 - UNRELEASED and log-dirs correctly even when there are no resources to localize for the container. (Siddharth Seth via vinodkv) + YARN-1278. Fixed NodeManager to not delete local resources for apps on resync + command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java index e5e55374af9..5f7d01ea2bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java @@ -25,13 +25,39 @@ import org.apache.hadoop.yarn.api.records.ContainerId; public class CMgrCompletedContainersEvent extends ContainerManagerEvent { private final List containerToCleanup; - - public CMgrCompletedContainersEvent(List containersToCleanup) { + private final Reason reason; + + public CMgrCompletedContainersEvent(List containersToCleanup, + Reason reason) { super(ContainerManagerEventType.FINISH_CONTAINERS); this.containerToCleanup = containersToCleanup; + this.reason = reason; } public List getContainersToCleanup() { return this.containerToCleanup; } + + public Reason getReason() { + return reason; + } + + public static enum Reason { + /** + * Container is killed as NodeManager is shutting down + */ + ON_SHUTDOWN, + + /** + * Container is killed as the Nodemanager is re-syncing with the + * ResourceManager + */ + ON_NODEMANAGER_RESYNC, + + /** + * Container is killed on request by the ResourceManager + */ + BY_RESOURCEMANAGER + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 79b9d7a83ff..998fca775d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -226,7 +226,8 @@ public class NodeManager extends CompositeService public void run() { LOG.info("Notifying ContainerManager to block new container-requests"); containerManager.setBlockNewContainerRequests(true); - containerManager.cleanUpApplications(NodeManagerEventType.RESYNC); + LOG.info("Cleaning up running containers on resync"); + containerManager.cleanupContainersOnNMResync(); ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater(); } }.start(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index b52f9d16500..d6af3fe7bd1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -501,7 +501,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements .getContainersToCleanup(); if (!containersToCleanup.isEmpty()) { dispatcher.getEventHandler().handle( - new CMgrCompletedContainersEvent(containersToCleanup)); + new CMgrCompletedContainersEvent(containersToCleanup, + CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER)); } List appsToCleanup = response.getApplicationsToCleanup(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index d158b43f2d0..3091c4adb6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -87,7 +87,6 @@ import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; -import org.apache.hadoop.yarn.server.nodemanager.NodeManagerEventType; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; @@ -306,7 +305,7 @@ public class ContainerManagerImpl extends CompositeService implements try { serviceStopped = true; if (context != null) { - cleanUpApplications(NodeManagerEventType.SHUTDOWN); + cleanUpApplicationsOnNMShutDown(); } } finally { this.writeLock.unlock(); @@ -320,7 +319,7 @@ public class ContainerManagerImpl extends CompositeService implements super.serviceStop(); } - public void cleanUpApplications(NodeManagerEventType eventType) { + public void cleanUpApplicationsOnNMShutDown() { Map applications = this.context.getApplications(); if (applications.isEmpty()) { @@ -336,33 +335,15 @@ public class ContainerManagerImpl extends CompositeService implements LOG.info("Waiting for Applications to be Finished"); - switch (eventType) { - case SHUTDOWN: - long waitStartTime = System.currentTimeMillis(); - while (!applications.isEmpty() - && System.currentTimeMillis() - waitStartTime - < waitForContainersOnShutdownMillis) { - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on applications finish on shutdown", - ex); - } - } - break; - case RESYNC: - while (!applications.isEmpty()) { - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on applications finish on resync", - ex); - } - } - break; - default: - throw new YarnRuntimeException("Get an unknown NodeManagerEventType: " - + eventType); + long waitStartTime = System.currentTimeMillis(); + while (!applications.isEmpty() + && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn( + "Interrupted while sleeping on applications finish on shutdown", ex); + } } // All applications Finished @@ -374,6 +355,40 @@ public class ContainerManagerImpl extends CompositeService implements } } + public void cleanupContainersOnNMResync() { + Map containers = context.getContainers(); + if (containers.isEmpty()) { + return; + } + LOG.info("Containers still running on " + + CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC + " : " + + containers.keySet()); + + List containerIds = + new ArrayList(containers.keySet()); + + LOG.info("Waiting for containers to be killed"); + + this.handle(new CMgrCompletedContainersEvent(containerIds, + CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC)); + while (!containers.isEmpty()) { + try { + Thread.sleep(1000); + nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext(); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on container kill on resync", ex); + } + } + + // All containers killed + if (containers.isEmpty()) { + LOG.info("All containers in DONE state"); + } else { + LOG.info("Done waiting for containers to be killed. Still alive: " + + containers.keySet()); + } + } + // Get the remoteUGI corresponding to the api call. protected UserGroupInformation getRemoteUgi() throws YarnException { @@ -850,7 +865,7 @@ public class ContainerManagerImpl extends CompositeService implements break; default: throw new YarnRuntimeException( - "Get an unknown ContainerManagerEvent type: " + event.getType()); + "Got an unknown ContainerManagerEvent type: " + event.getType()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index 3e0846b0422..f2090fac0fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -102,7 +102,11 @@ public class TestNodeManagerResync { } catch (BrokenBarrierException e) { } Assert.assertEquals(2, ((TestNodeManager1) nm).getNMRegistrationCount()); - + // Only containers should be killed on resync, apps should lie around. That + // way local resources for apps can be used beyond resync without + // relocalization + Assert.assertTrue(nm.getNMContext().getApplications() + .containsKey(cId.getApplicationAttemptId().getApplicationId())); Assert.assertFalse(assertionFailedInThread.get()); nm.stop(); @@ -285,7 +289,6 @@ public class TestNodeManagerResync { recordFactory.newRecordInstance(ContainerLaunchContext.class); try { while (!isStopped && numContainers < 10) { - ContainerId cId = TestNodeManagerShutdown.createContainerId(); StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, null); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 8372aff851e..f356a2ab839 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -93,6 +93,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +@SuppressWarnings("rawtypes") public class TestNodeStatusUpdater { // temp fix until metrics system can auto-detect itself running in unit test: @@ -352,7 +353,6 @@ public class TestNodeStatusUpdater { private class MyNodeStatusUpdater4 extends NodeStatusUpdaterImpl { - private Context context; private final long rmStartIntervalMS; private final boolean rmNeverStart; public ResourceTracker resourceTracker; @@ -360,7 +360,6 @@ public class TestNodeStatusUpdater { NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, long rmStartIntervalMS, boolean rmNeverStart) { super(context, dispatcher, healthChecker, metrics); - this.context = context; this.rmStartIntervalMS = rmStartIntervalMS; this.rmNeverStart = rmNeverStart; } @@ -376,8 +375,8 @@ public class TestNodeStatusUpdater { RetryPolicy retryPolicy = RMProxy.createRetryPolicy(conf); resourceTracker = (ResourceTracker) RetryProxy.create(ResourceTracker.class, - new MyResourceTracker6(this.context, rmStartIntervalMS, - rmNeverStart), retryPolicy); + new MyResourceTracker6(rmStartIntervalMS, rmNeverStart), + retryPolicy); return resourceTracker; } @@ -685,14 +684,11 @@ public class TestNodeStatusUpdater { private class MyResourceTracker6 implements ResourceTracker { - private final Context context; private long rmStartIntervalMS; private boolean rmNeverStart; private final long waitStartTime; - public MyResourceTracker6(Context context, long rmStartIntervalMS, - boolean rmNeverStart) { - this.context = context; + public MyResourceTracker6(long rmStartIntervalMS, boolean rmNeverStart) { this.rmStartIntervalMS = rmStartIntervalMS; this.rmNeverStart = rmNeverStart; this.waitStartTime = System.currentTimeMillis(); @@ -868,8 +864,8 @@ public class TestNodeStatusUpdater { metrics, aclsManager, dirsHandler) { @Override - public void cleanUpApplications(NodeManagerEventType eventType) { - super.cleanUpApplications(NodeManagerEventType.SHUTDOWN); + public void cleanUpApplicationsOnNMShutDown() { + super.cleanUpApplicationsOnNMShutDown(); numCleanups.incrementAndGet(); } }; @@ -1222,8 +1218,8 @@ public class TestNodeStatusUpdater { metrics, aclsManager, dirsHandler) { @Override - public void cleanUpApplications(NodeManagerEventType eventType) { - super.cleanUpApplications(NodeManagerEventType.SHUTDOWN); + public void cleanUpApplicationsOnNMShutDown() { + super.cleanUpApplicationsOnNMShutDown(); numCleanups.incrementAndGet(); } }; From 7317e97bd72ca30f5db37fa94389dbdb52ae079e Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Sun, 6 Oct 2013 18:39:03 +0000 Subject: [PATCH 065/133] HDFS-5299. DFS client hangs in updatePipeline RPC when failover happened. Contributed by Vinay. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529660 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hdfs/server/namenode/FSNamesystem.java | 39 +++++++++------- .../namenode/TestNamenodeRetryCache.java | 44 ++++++++++++++++++- 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c5fe5c72b0b..e92325720dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -400,6 +400,9 @@ Release 2.1.2 - UNRELEASED HDFS-5306. Datanode https port is not available at the namenode. (Suresh Srinivas via brandonli) + HDFS-5299. DFS client hangs in updatePipeline RPC when failover happened. + (Vinay via jing9) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5a6d5387c8f..b81a237c4b8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1793,16 +1793,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void createSymlink(String target, String link, PermissionStatus dirPerms, boolean createParent) throws IOException, UnresolvedLinkException { - CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); - if (cacheEntry != null && cacheEntry.isSuccess()) { - return; // Return previous response - } if (!DFSUtil.isValidName(link)) { throw new InvalidPathException("Invalid link name: " + link); } if (FSDirectory.isReservedName(target)) { throw new InvalidPathException("Invalid target name: " + target); } + CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return; // Return previous response + } boolean success = false; try { createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null); @@ -3023,10 +3023,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, /** Rename src to dst */ void renameTo(String src, String dst, Options.Rename... options) throws IOException, UnresolvedLinkException { - CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); - if (cacheEntry != null && cacheEntry.isSuccess()) { - return; // Return previous response - } if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - " + src + " to " + dst); @@ -3034,8 +3030,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (!DFSUtil.isValidName(dst)) { throw new InvalidPathException("Invalid name: " + dst); } - FSPermissionChecker pc = getPermissionChecker(); + final FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); + CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return; // Return previous response + } byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); HdfsFileStatus resultingStat = null; @@ -4249,12 +4250,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * @throws IOException if */ void saveNamespace() throws AccessControlException, IOException { + checkOperation(OperationCategory.UNCHECKED); + checkSuperuserPrivilege(); + CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { return; // Return previous response } - checkSuperuserPrivilege(); - checkOperation(OperationCategory.UNCHECKED); boolean success = false; readLock(); try { @@ -5140,11 +5142,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void endCheckpoint(NamenodeRegistration registration, CheckpointSignature sig) throws IOException { + checkOperation(OperationCategory.CHECKPOINT); CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { return; // Return previous response } - checkOperation(OperationCategory.CHECKPOINT); boolean success = false; readLock(); try { @@ -5676,11 +5678,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void updatePipeline(String clientName, ExtendedBlock oldBlock, ExtendedBlock newBlock, DatanodeID[] newNodes) throws IOException { + checkOperation(OperationCategory.WRITE); CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { return; // Return previous response } - checkOperation(OperationCategory.WRITE); LOG.info("updatePipeline(block=" + oldBlock + ", newGenerationStamp=" + newBlock.getGenerationStamp() + ", newLength=" + newBlock.getNumBytes() @@ -6614,12 +6616,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ String createSnapshot(String snapshotRoot, String snapshotName) throws SafeModeException, IOException { + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, null); if (cacheEntry != null && cacheEntry.isSuccess()) { return (String) cacheEntry.getPayload(); } - final FSPermissionChecker pc = getPermissionChecker(); writeLock(); String snapshotPath = null; try { @@ -6666,11 +6669,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void renameSnapshot(String path, String snapshotOldName, String snapshotNewName) throws SafeModeException, IOException { + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { return; // Return previous response } - final FSPermissionChecker pc = getPermissionChecker(); writeLock(); boolean success = false; try { @@ -6711,10 +6715,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, public SnapshottableDirectoryStatus[] getSnapshottableDirListing() throws IOException { SnapshottableDirectoryStatus[] status = null; + final FSPermissionChecker checker = getPermissionChecker(); readLock(); try { checkOperation(OperationCategory.READ); - FSPermissionChecker checker = getPermissionChecker(); final String user = checker.isSuperUser()? null : checker.getUser(); status = snapshotManager.getSnapshottableDirListing(user); } finally { @@ -6782,13 +6786,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void deleteSnapshot(String snapshotRoot, String snapshotName) throws SafeModeException, IOException { + checkOperation(OperationCategory.WRITE); final FSPermissionChecker pc = getPermissionChecker(); + CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); if (cacheEntry != null && cacheEntry.isSuccess()) { return; // Return previous response } boolean success = false; - checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java index 54dda2fe8ba..ddb7c0fa692 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.IOException; import java.util.EnumSet; @@ -35,11 +36,15 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.protocol.DatanodeID; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.ipc.ClientId; @@ -47,7 +52,9 @@ import org.apache.hadoop.ipc.RPC.RpcKind; import org.apache.hadoop.ipc.RetryCache.CacheEntry; import org.apache.hadoop.ipc.RpcConstants; import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.LightWeightCache; import org.junit.After; import org.junit.Assert; @@ -75,12 +82,13 @@ public class TestNamenodeRetryCache { "TestNamenodeRetryCache", null, FsPermission.getDefault()); private static DistributedFileSystem filesystem; private static int callId = 100; - private static Configuration conf = new HdfsConfiguration(); + private static Configuration conf; private static final int BlockSize = 512; /** Start a cluster */ @Before public void setup() throws Exception { + conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BlockSize); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, true); cluster = new MiniDFSCluster.Builder(conf).build(); @@ -293,6 +301,40 @@ public class TestNamenodeRetryCache { } } + /** + * Make sure a retry call does not hang because of the exception thrown in the + * first call. + */ + @Test(timeout = 60000) + public void testUpdatePipelineWithFailOver() throws Exception { + cluster.shutdown(); + namesystem = null; + filesystem = null; + cluster = new MiniDFSCluster.Builder(conf).nnTopology( + MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).build(); + FSNamesystem ns0 = cluster.getNamesystem(0); + ExtendedBlock oldBlock = new ExtendedBlock(); + ExtendedBlock newBlock = new ExtendedBlock(); + DatanodeID[] newNodes = new DatanodeID[2]; + + newCall(); + try { + ns0.updatePipeline("testClient", oldBlock, newBlock, newNodes); + fail("Expect StandbyException from the updatePipeline call"); + } catch (StandbyException e) { + // expected, since in the beginning both nn are in standby state + GenericTestUtils.assertExceptionContains( + HAServiceState.STANDBY.toString(), e); + } + + cluster.transitionToActive(0); + try { + ns0.updatePipeline("testClient", oldBlock, newBlock, newNodes); + } catch (IOException e) { + // ignore call should not hang. + } + } + /** * Test for crateSnapshot */ From 21181b65531449e5fda321c11f0672c3067641aa Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sun, 6 Oct 2013 18:43:36 +0000 Subject: [PATCH 066/133] YARN-1277. Added a policy based configuration for http/https in common HttpServer and using the same in YARN - related to per project https config support via HADOOP-10022. Contributed by Suresh Srinivas and Omkar Vinit Joshi. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529662 13f79535-47bb-0310-9956-ffa450edef68 --- .../fs/CommonConfigurationKeysPublic.java | 4 + .../org/apache/hadoop/http/HttpConfig.java | 30 +++-- .../apache/hadoop/http/TestSSLHttpServer.java | 4 +- .../hadoop/mapreduce/v2/app/MRAppMaster.java | 69 +++-------- .../v2/app/webapp/AppController.java | 3 +- .../v2/jobhistory/JHAdminConfig.java | 16 +-- .../mapreduce/v2/util/MRWebAppUtil.java | 112 +++++++----------- .../org/apache/hadoop/mapreduce/MRConfig.java | 5 - .../src/main/resources/mapred-default.xml | 24 +--- .../mapreduce/v2/hs/JobHistoryServer.java | 3 +- hadoop-yarn-project/CHANGES.txt | 4 + .../hadoop/yarn/conf/YarnConfiguration.java | 8 +- .../hadoop/yarn/webapp/util/WebAppUtils.java | 9 +- .../src/main/resources/yarn-default.xml | 11 ++ .../yarn/server/nodemanager/NodeManager.java | 9 ++ .../resourcemanager/ResourceManager.java | 8 ++ .../yarn/server/webproxy/ProxyUriUtils.java | 20 ++-- 17 files changed, 160 insertions(+), 179 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index ab30003ed36..5bedadd7973 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -264,5 +264,9 @@ public class CommonConfigurationKeysPublic { /** Default value for HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN */ public static final int HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT = 60; + + // HTTP policies to be used in configuration + public static final String HTTP_POLICY_HTTP_ONLY = "HTTP_ONLY"; + public static final String HTTP_POLICY_HTTPS_ONLY = "HTTPS_ONLY"; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpConfig.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpConfig.java index d9e219a332a..fe3e5ae410a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpConfig.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpConfig.java @@ -28,25 +28,41 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @InterfaceAudience.Private @InterfaceStability.Unstable public class HttpConfig { - private static boolean sslEnabled; + private static Policy policy; + public enum Policy { + HTTP_ONLY, + HTTPS_ONLY; + + public static Policy fromString(String value) { + if (value.equalsIgnoreCase(CommonConfigurationKeysPublic + .HTTP_POLICY_HTTPS_ONLY)) { + return HTTPS_ONLY; + } + return HTTP_ONLY; + } + } static { Configuration conf = new Configuration(); - sslEnabled = conf.getBoolean( - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_DEFAULT); + boolean sslEnabled = conf.getBoolean( + CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, + CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_DEFAULT); + policy = sslEnabled ? Policy.HTTPS_ONLY : Policy.HTTP_ONLY; } - public static void setSecure(boolean secure) { - sslEnabled = secure; + public static void setPolicy(Policy policy) { + HttpConfig.policy = policy; } public static boolean isSecure() { - return sslEnabled; + return policy == Policy.HTTPS_ONLY; } public static String getSchemePrefix() { return (isSecure()) ? "https://" : "http://"; } + public static String getScheme(Policy policy) { + return policy == Policy.HTTPS_ONLY ? "https://" : "http://"; + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java index 880804ec2c5..e5fd4b0a087 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java @@ -54,7 +54,7 @@ public class TestSSLHttpServer extends HttpServerFunctionalTest { @Before public void setup() throws Exception { - HttpConfig.setSecure(true); + HttpConfig.setPolicy(HttpConfig.Policy.HTTPS_ONLY); File base = new File(BASEDIR); FileUtil.fullyDelete(base); base.mkdirs(); @@ -89,7 +89,7 @@ public class TestSSLHttpServer extends HttpServerFunctionalTest { String classpathDir = KeyStoreTestUtil.getClasspathDir(TestSSLHttpServer.class); new File(classpathDir, CONFIG_SITE_XML).delete(); - HttpConfig.setSecure(false); + HttpConfig.setPolicy(HttpConfig.Policy.HTTP_ONLY); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 9e038121033..1509cb51e7a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -18,60 +18,29 @@ package org.apache.hadoop.mapreduce.v2.app; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicBoolean; - +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.http.HttpConfig; -import org.apache.hadoop.mapred.FileOutputCommitter; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.LocalContainerLauncher; -import org.apache.hadoop.mapred.TaskAttemptListenerImpl; -import org.apache.hadoop.mapred.TaskUmbilicalProtocol; -import org.apache.hadoop.mapreduce.MRConfig; -import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.TypeConverter; -import org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent; -import org.apache.hadoop.mapreduce.jobhistory.EventReader; -import org.apache.hadoop.mapreduce.jobhistory.EventType; -import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryCopyService; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser; +import org.apache.hadoop.mapreduce.jobhistory.*; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; -import org.apache.hadoop.mapreduce.v2.api.records.JobId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskState; +import org.apache.hadoop.mapreduce.v2.api.records.*; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.client.MRClientService; @@ -82,30 +51,17 @@ import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.*; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl; import org.apache.hadoop.mapreduce.v2.app.local.LocalContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics; -import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; -import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; -import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator; -import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator; -import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerRequestor; -import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; +import org.apache.hadoop.mapreduce.v2.app.rm.*; import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator; import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator; import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent; -import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; @@ -139,7 +95,14 @@ import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.SystemClock; -import com.google.common.annotations.VisibleForTesting; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.security.PrivilegedExceptionAction; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; /** * The Map-Reduce Application Master. @@ -1351,7 +1314,7 @@ public class MRAppMaster extends CompositeService { // RM/NM to issue SSL certificates but definitely not MR-AM as it is // running in user-land. MRWebAppUtil.initialize(conf); - HttpConfig.setSecure(MRWebAppUtil.isSSLEnabledInMRAM()); + HttpConfig.setPolicy(HttpConfig.Policy.HTTP_ONLY); // log the system properties String systemPropsToLog = MRApps.getSystemPropertiesToLog(conf); if (systemPropsToLog != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java index bb188c06f3a..d7929cc8e38 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AppController.java @@ -63,7 +63,8 @@ public class AppController extends Controller implements AMParams { set(APP_ID, app.context.getApplicationID().toString()); set(RM_WEB, JOINER.join(MRWebAppUtil.getYARNWebappScheme(), - WebAppUtils.getResolvedRMWebAppURLWithoutScheme(conf))); + WebAppUtils.getResolvedRMWebAppURLWithoutScheme(conf, + MRWebAppUtil.getYARNHttpPolicy()))); } @Inject diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java index e7986d4afe7..ee7dae93cac 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java @@ -18,14 +18,9 @@ package org.apache.hadoop.mapreduce.v2.jobhistory; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.net.UnknownHostException; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; /** * Stores Job History configuration keys that can be set by administrators of @@ -129,10 +124,11 @@ public class JHAdminConfig { public static final String MR_HISTORY_PRINCIPAL = MR_HISTORY_PREFIX + "principal"; - /** To enable SSL in MR history server */ - public static final String MR_HS_SSL_ENABLED = MR_HISTORY_PREFIX - + "ssl.enabled"; - public static boolean DEFAULT_MR_HS_SSL_ENABLED = false; + /** To enable https in MR history server */ + public static final String MR_HS_HTTP_POLICY = MR_HISTORY_PREFIX + + "http.policy"; + public static String DEFAULT_MR_HS_HTTP_POLICY = + CommonConfigurationKeysPublic.HTTP_POLICY_HTTP_ONLY; /**The address the history server webapp is on.*/ public static final String MR_HISTORY_WEBAPP_ADDRESS = diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java index 095d25b7841..49a0407d0eb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java @@ -17,24 +17,25 @@ */ package org.apache.hadoop.mapreduce.v2.util; +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; + import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.Iterator; -import org.apache.hadoop.classification.InterfaceAudience.Private; -import org.apache.hadoop.classification.InterfaceStability.Evolving; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.MRConfig; -import org.apache.hadoop.mapreduce.TypeConverter; -import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; -import org.apache.hadoop.net.NetUtils; -import org.apache.hadoop.yarn.api.records.ApplicationId; - -import com.google.common.base.Joiner; -import com.google.common.base.Splitter; +import static org.apache.hadoop.http.HttpConfig.Policy; @Private @Evolving @@ -42,63 +43,44 @@ public class MRWebAppUtil { private static final Splitter ADDR_SPLITTER = Splitter.on(':').trimResults(); private static final Joiner JOINER = Joiner.on(""); - private static boolean isSSLEnabledInYARN; - private static boolean isSSLEnabledInJHS; - private static boolean isSSLEnabledInMRAM; - + private static Policy httpPolicyInYarn; + private static Policy httpPolicyInJHS; + public static void initialize(Configuration conf) { - setSSLEnabledInYARN(conf.getBoolean( - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, - CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_DEFAULT)); - setSSLEnabledInJHS(conf.getBoolean(JHAdminConfig.MR_HS_SSL_ENABLED, - JHAdminConfig.DEFAULT_MR_HS_SSL_ENABLED)); - setSSLEnabledInMRAM(conf.getBoolean(MRConfig.SSL_ENABLED_KEY, - MRConfig.SSL_ENABLED_KEY_DEFAULT)); + setHttpPolicyInYARN(conf.get( + YarnConfiguration.YARN_HTTP_POLICY_KEY, + YarnConfiguration.YARN_HTTP_POLICY_DEFAULT)); + setHttpPolicyInJHS(conf.get(JHAdminConfig.MR_HS_HTTP_POLICY, + JHAdminConfig.DEFAULT_MR_HS_HTTP_POLICY)); } - private static void setSSLEnabledInYARN(boolean isSSLEnabledInYARN) { - MRWebAppUtil.isSSLEnabledInYARN = isSSLEnabledInYARN; + private static void setHttpPolicyInJHS(String policy) { + MRWebAppUtil.httpPolicyInJHS = Policy.fromString(policy); } - private static void setSSLEnabledInJHS(boolean isSSLEnabledInJHS) { - MRWebAppUtil.isSSLEnabledInJHS = isSSLEnabledInJHS; + private static void setHttpPolicyInYARN(String policy) { + MRWebAppUtil.httpPolicyInYarn = Policy.fromString(policy); } - private static void setSSLEnabledInMRAM(boolean isSSLEnabledInMRAM) { - MRWebAppUtil.isSSLEnabledInMRAM = isSSLEnabledInMRAM; + public static Policy getJHSHttpPolicy() { + return MRWebAppUtil.httpPolicyInJHS; } - public static boolean isSSLEnabledInYARN() { - return isSSLEnabledInYARN; - } - - public static boolean isSSLEnabledInJHS() { - return isSSLEnabledInJHS; - } - - public static boolean isSSLEnabledInMRAM() { - return isSSLEnabledInMRAM; + public static Policy getYARNHttpPolicy() { + return MRWebAppUtil.httpPolicyInYarn; } public static String getYARNWebappScheme() { - if (isSSLEnabledInYARN) { - return "https://"; - } else { - return "http://"; - } + return HttpConfig.getScheme(httpPolicyInYarn); } public static String getJHSWebappScheme() { - if (isSSLEnabledInJHS) { - return "https://"; - } else { - return "http://"; - } + return HttpConfig.getScheme(httpPolicyInJHS); } public static void setJHSWebappURLWithoutScheme(Configuration conf, String hostAddress) { - if (isSSLEnabledInJHS) { + if (httpPolicyInJHS == Policy.HTTPS_ONLY) { conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, hostAddress); } else { conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, hostAddress); @@ -106,7 +88,7 @@ public class MRWebAppUtil { } public static String getJHSWebappURLWithoutScheme(Configuration conf) { - if (isSSLEnabledInJHS) { + if (httpPolicyInJHS == Policy.HTTPS_ONLY) { return conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS); } else { @@ -120,7 +102,7 @@ public class MRWebAppUtil { } public static InetSocketAddress getJHSWebBindAddress(Configuration conf) { - if (isSSLEnabledInJHS) { + if (httpPolicyInJHS == Policy.HTTPS_ONLY) { return conf.getSocketAddr(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS, JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_PORT); @@ -168,26 +150,18 @@ public class MRWebAppUtil { } private static int getDefaultJHSWebappPort() { - if (isSSLEnabledInJHS) { - return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_PORT; - } else { - return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT; - } + return httpPolicyInJHS == Policy.HTTPS_ONLY ? + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_PORT: + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT; } private static String getDefaultJHSWebappURLWithoutScheme() { - if (isSSLEnabledInJHS) { - return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS; - } else { - return JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS; - } + return httpPolicyInJHS == Policy.HTTPS_ONLY ? + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_HTTPS_ADDRESS : + JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS; } - + public static String getAMWebappScheme(Configuration conf) { - if (isSSLEnabledInMRAM) { - return "https://"; - } else { - return "http://"; - } + return "http://"; } } \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java index 830bb4406cd..bbac5fcab9d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java @@ -84,11 +84,6 @@ public interface MRConfig { "mapreduce.shuffle.ssl.enabled"; public static final boolean SHUFFLE_SSL_ENABLED_DEFAULT = false; - - public static final String SSL_ENABLED_KEY = - "mapreduce.am.ssl.enabled"; - - public static final boolean SSL_ENABLED_KEY_DEFAULT = false; public static final String SHUFFLE_CONSUMER_PLUGIN = "mapreduce.job.reduce.shuffle.consumer.plugin.class"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 0e1b21ec0d1..598d106ce95 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -289,20 +289,6 @@ - - mapreduce.am.ssl.enabled - false - - If enabled, MapReduce application master's http server will be - started with SSL enabled. Map reduce AM by default doesn't support SSL. - If MapReduce jobs want SSL support, it is the user's responsibility to - create and manage certificates, keystores and trust-stores with appropriate - permissions. This is only for MapReduce application master and is not used - by job history server. To enable encrypted shuffle this property is not - required, instead refer to (mapreduce.shuffle.ssl.enabled) property. - - - mapreduce.shuffle.ssl.file.buffer.size 65536 @@ -1235,11 +1221,13 @@ - mapreduce.jobhistory.ssl.enabled - false + mapreduce.jobhistory.http.policy + HTTP_ONLY - Whether to use SSL for the HTTP endpoints. If set to true, the - JobHistoryServer web UIs will be served over HTTPS instead HTTP. + This configures the HTTP endpoint for JobHistoryServer web UI. + The following values are supported: + - HTTP_ONLY : Service is provided only on http + - HTTPS_ONLY : Service is provided only on https diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java index 168d75d1083..4fc84c96fae 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java @@ -24,7 +24,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MRConfig; @@ -120,7 +119,7 @@ public class JobHistoryServer extends CompositeService { // This is required for WebApps to use https if enabled. MRWebAppUtil.initialize(getConfig()); - HttpConfig.setSecure(MRWebAppUtil.isSSLEnabledInJHS()); + HttpConfig.setPolicy(MRWebAppUtil.getJHSHttpPolicy()); try { doSecureLogin(conf); } catch(IOException ie) { diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 2aa8c55659a..0735120b4e7 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -102,6 +102,10 @@ Release 2.1.2 - UNRELEASED YARN-1213. Restore config to ban submitting to undeclared pools in the Fair Scheduler. (Sandy Ryza) + YARN-1277. Added a policy based configuration for http/https in common + HttpServer and using the same in YARN - related to per project https config + support via HADOOP-10022. (Suresh Srinivas and Omkar Vinit Joshi via vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 31f344293d5..2003e13c3b5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -24,6 +24,7 @@ import java.util.Arrays; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; @@ -862,7 +863,12 @@ public class YarnConfiguration extends Configuration { public static final String NM_CLIENT_MAX_NM_PROXIES = YARN_PREFIX + "client.max-nodemanagers-proxies"; public static final int DEFAULT_NM_CLIENT_MAX_NM_PROXIES = 500; - + + public static final String YARN_HTTP_POLICY_KEY = + YARN_PREFIX + "http.policy"; + public static final String YARN_HTTP_POLICY_DEFAULT = + CommonConfigurationKeysPublic.HTTP_POLICY_HTTP_ONLY; + public YarnConfiguration() { super(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java index c340332a326..ede55013b09 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.http.HttpConfig.Policy; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -97,8 +98,14 @@ public class WebAppUtils { } public static String getResolvedRMWebAppURLWithoutScheme(Configuration conf) { + return getResolvedRMWebAppURLWithoutScheme(conf, + HttpConfig.isSecure() ? Policy.HTTPS_ONLY : Policy.HTTP_ONLY); + } + + public static String getResolvedRMWebAppURLWithoutScheme(Configuration conf, + Policy httpPolicy) { InetSocketAddress address = null; - if (HttpConfig.isSecure()) { + if (httpPolicy == Policy.HTTPS_ONLY) { address = conf.getSocketAddr(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, YarnConfiguration.DEFAULT_RM_WEBAPP_HTTPS_ADDRESS, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 0127fcc579f..86501ad7799 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -99,6 +99,17 @@ 50 + + + This configures the HTTP endpoint for Yarn Daemons.The following + values are supported: + - HTTP_ONLY : Service is provided only on http + - HTTPS_ONLY : Service is provided only on https + + yarn.http.policy + HTTP_ONLY + + The http address of the RM web application. yarn.resourcemanager.webapp.address diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 998fca775d5..a169c125a38 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -28,6 +28,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.http.HttpConfig.Policy; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.service.CompositeService; @@ -395,9 +397,16 @@ public class NodeManager extends CompositeService StringUtils.startupShutdownMessage(NodeManager.class, args, LOG); NodeManager nodeManager = new NodeManager(); Configuration conf = new YarnConfiguration(); + setHttpPolicy(conf); nodeManager.initAndStartNodeManager(conf, false); } + private static void setHttpPolicy(Configuration conf) { + HttpConfig.setPolicy(Policy.fromString(conf.get( + YarnConfiguration.YARN_HTTP_POLICY_KEY, + YarnConfiguration.YARN_HTTP_POLICY_DEFAULT))); + } + @VisibleForTesting @Private public NodeStatusUpdater getNodeStatusUpdater() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 3a059217759..e46c2bf1d55 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.http.HttpConfig.Policy; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.security.SecurityUtil; @@ -935,6 +936,7 @@ public class ResourceManager extends CompositeService implements Recoverable { ShutdownHookManager.get().addShutdownHook( new CompositeServiceShutdownHook(resourceManager), SHUTDOWN_HOOK_PRIORITY); + setHttpPolicy(conf); resourceManager.init(conf); resourceManager.start(); } catch (Throwable t) { @@ -942,4 +944,10 @@ public class ResourceManager extends CompositeService implements Recoverable { System.exit(-1); } } + + private static void setHttpPolicy(Configuration conf) { + HttpConfig.setPolicy(Policy.fromString(conf.get( + YarnConfiguration.YARN_HTTP_POLICY_KEY, + YarnConfiguration.YARN_HTTP_POLICY_DEFAULT))); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java index 2be18d33e55..4481f609069 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java @@ -18,7 +18,11 @@ package org.apache.hadoop.yarn.server.webproxy; -import static org.apache.hadoop.yarn.util.StringHelper.ujoin; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.util.TrackingUriPlugin; import java.io.UnsupportedEncodingException; import java.net.URI; @@ -26,11 +30,7 @@ import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.http.HttpConfig; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.util.TrackingUriPlugin; +import static org.apache.hadoop.yarn.util.StringHelper.ujoin; public class ProxyUriUtils { @SuppressWarnings("unused") @@ -148,9 +148,9 @@ public class ProxyUriUtils { /* * check is made to make sure if AM reports with scheme then it will be * used by default otherwise it will default to the one configured using - * "hadoop.ssl.enabled". + * "yarn.http.policy". */ - return new URI(HttpConfig.getSchemePrefix() + url); + return new URI(HttpConfig.getSchemePrefix() + url); } else { return new URI(url); } @@ -168,9 +168,9 @@ public class ProxyUriUtils { /* * check is made to make sure if AM reports with scheme then it will be * used by default otherwise it will default to the one configured using - * "hadoop.ssl.enabled". + * "yarn.http.policy". */ - return new URI(scheme + "://" + noSchemeUrl); + return new URI(scheme + "://" + noSchemeUrl); } else { return new URI(noSchemeUrl); } From f0799c55360e1e77224955f331892390e4361729 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sun, 6 Oct 2013 20:53:28 +0000 Subject: [PATCH 067/133] MAPREDUCE-5562. Fixed MR App Master to perform pending tasks like staging-dir cleanup, sending job-end notification correctly when unregister with RM fails. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529682 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 + .../hadoop/mapreduce/v2/app/AppContext.java | 2 +- .../hadoop/mapreduce/v2/app/MRAppMaster.java | 105 +++++++++---- .../mapreduce/v2/app/job/impl/JobImpl.java | 4 +- .../mapreduce/v2/app/rm/RMCommunicator.java | 88 ++++++----- .../apache/hadoop/mapreduce/v2/app/MRApp.java | 24 +-- .../mapreduce/v2/app/MockAppContext.java | 2 +- .../mapreduce/v2/app/TestJobEndNotifier.java | 142 +++++++++++++++--- .../hadoop/mapreduce/v2/app/TestMRApp.java | 22 +-- .../v2/app/TestRuntimeEstimators.java | 2 +- .../mapreduce/v2/app/TestStagingCleanup.java | 90 ++++++++++- .../v2/app/job/impl/TestJobImpl.java | 12 +- .../local/TestLocalContainerAllocator.java | 4 + .../hadoop/mapreduce/v2/hs/JobHistory.java | 2 +- 14 files changed, 380 insertions(+), 123 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 8c10325e387..5ca29b021b4 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -282,6 +282,10 @@ Release 2.1.2 - UNRELEASED aren't heart-beating for a while, so that we can aggressively speculate instead of waiting for task-timeout (Xuan Gong via vinodkv) + MAPREDUCE-5562. Fixed MR App Master to perform pending tasks like staging-dir + cleanup, sending job-end notification correctly when unregister with RM + fails. (Zhijie Shen via vinodkv) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/AppContext.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/AppContext.java index 36482aebe31..6f036c4a74a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/AppContext.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/AppContext.java @@ -64,6 +64,6 @@ public interface AppContext { boolean isLastAMRetry(); - boolean safeToReportTerminationToUser(); + boolean hasSuccessfullyUnregistered(); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 1509cb51e7a..b60b64764a2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -18,7 +18,21 @@ package org.apache.hadoop.mapreduce.v2.app; -import com.google.common.annotations.VisibleForTesting; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; + import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -27,20 +41,37 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.http.HttpConfig; -import org.apache.hadoop.mapred.*; -import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.mapred.FileOutputCommitter; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.LocalContainerLauncher; +import org.apache.hadoop.mapred.TaskAttemptListenerImpl; +import org.apache.hadoop.mapred.TaskUmbilicalProtocol; +import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.jobhistory.*; +import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent; +import org.apache.hadoop.mapreduce.jobhistory.EventReader; +import org.apache.hadoop.mapreduce.jobhistory.EventType; +import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryCopyService; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.hadoop.mapreduce.v2.api.records.*; +import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobReport; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.client.MRClientService; @@ -51,14 +82,26 @@ import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; -import org.apache.hadoop.mapreduce.v2.app.job.event.*; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl; import org.apache.hadoop.mapreduce.v2.app.local.LocalContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics; -import org.apache.hadoop.mapreduce.v2.app.rm.*; +import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; +import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; +import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator; +import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator; +import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerRequestor; +import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator; import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator; import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent; @@ -95,14 +138,7 @@ import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.SystemClock; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.security.PrivilegedExceptionAction; -import java.util.*; -import java.util.Map.Entry; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicBoolean; +import com.google.common.annotations.VisibleForTesting; /** * The Map-Reduce Application Master. @@ -166,7 +202,8 @@ public class MRAppMaster extends CompositeService { private Credentials jobCredentials = new Credentials(); // Filled during init protected UserGroupInformation currentUser; // Will be setup during init - private volatile boolean isLastAMRetry = false; + @VisibleForTesting + protected volatile boolean isLastAMRetry = false; //Something happened and we should shut down right after we start up. boolean errorHappenedShutDown = false; private String shutDownMessage = null; @@ -175,7 +212,7 @@ public class MRAppMaster extends CompositeService { private long recoveredJobStartTime = 0; @VisibleForTesting - protected AtomicBoolean safeToReportTerminationToUser = + protected AtomicBoolean successfullyUnregistered = new AtomicBoolean(false); public MRAppMaster(ApplicationAttemptId applicationAttemptId, @@ -208,14 +245,14 @@ public class MRAppMaster extends CompositeService { initJobCredentialsAndUGI(conf); - isLastAMRetry = appAttemptID.getAttemptId() >= maxAppAttempts; + context = new RunningAppContext(conf); + + ((RunningAppContext)context).computeIsLastAMRetry(); LOG.info("The specific max attempts: " + maxAppAttempts + " for application: " + appAttemptID.getApplicationId().getId() + ". Attempt num: " + appAttemptID.getAttemptId() + " is last retry: " + isLastAMRetry); - context = new RunningAppContext(conf); - // Job name is the same as the app name util we support DAG of jobs // for an app later appName = conf.get(MRJobConfig.JOB_NAME, ""); @@ -511,11 +548,6 @@ public class MRAppMaster extends CompositeService { MRAppMaster.this.stop(); if (isLastAMRetry) { - // Except ClientService, other services are already stopped, it is safe to - // let clients know the final states. ClientService should wait for some - // time so clients have enough time to know the final states. - safeToReportTerminationToUser.set(true); - // Send job-end notification when it is safe to report termination to // users and it is the last AM retry if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) { @@ -524,7 +556,14 @@ public class MRAppMaster extends CompositeService { + job.getReport().getJobId()); JobEndNotifier notifier = new JobEndNotifier(); notifier.setConf(getConfig()); - notifier.notify(job.getReport()); + JobReport report = job.getReport(); + // If unregistration fails, the final state is unavailable. However, + // at the last AM Retry, the client will finally be notified FAILED + // from RM, so we should let users know FAILED via notifier as well + if (!context.hasSuccessfullyUnregistered()) { + report.setJobState(JobState.FAILED); + } + notifier.notify(report); } catch (InterruptedException ie) { LOG.warn("Job end notification interrupted for jobID : " + job.getReport().getJobId(), ie); @@ -863,7 +902,7 @@ public class MRAppMaster extends CompositeService { } } - private class RunningAppContext implements AppContext { + public class RunningAppContext implements AppContext { private final Map jobs = new ConcurrentHashMap(); private final Configuration conf; @@ -942,8 +981,16 @@ public class MRAppMaster extends CompositeService { } @Override - public boolean safeToReportTerminationToUser() { - return safeToReportTerminationToUser.get(); + public boolean hasSuccessfullyUnregistered() { + return successfullyUnregistered.get(); + } + + public void markSuccessfulUnregistration() { + successfullyUnregistered.set(true); + } + + public void computeIsLastAMRetry() { + isLastAMRetry = appAttemptID.getAttemptId() >= maxAppAttempts; } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index 6241df905fa..c884a51cbf9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -128,8 +128,6 @@ import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.Clock; -import com.google.common.annotations.VisibleForTesting; - /** Implementation of Job interface. Maintains the state machines of Job. * The read and write calls use ReadWriteLock for concurrency. */ @@ -933,7 +931,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, readLock.lock(); try { JobState state = getExternalState(getInternalState()); - if (!appContext.safeToReportTerminationToUser() + if (!appContext.hasSuccessfullyUnregistered() && (state == JobState.SUCCEEDED || state == JobState.FAILED || state == JobState.KILLED || state == JobState.ERROR)) { return lastNonFinalState; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index 67c632a87c0..f09ac744d5e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -29,11 +29,11 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.app.AppContext; +import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; @@ -52,10 +52,13 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.client.ClientRMProxy; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import com.google.common.annotations.VisibleForTesting; + /** * Registers/unregisters to RM and sends heartbeats to RM. */ @@ -171,41 +174,57 @@ public abstract class RMCommunicator extends AbstractService protected void unregister() { try { - FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; - JobImpl jobImpl = (JobImpl)job; - if (jobImpl.getInternalState() == JobStateInternal.SUCCEEDED) { - finishState = FinalApplicationStatus.SUCCEEDED; - } else if (jobImpl.getInternalState() == JobStateInternal.KILLED - || (jobImpl.getInternalState() == JobStateInternal.RUNNING && isSignalled)) { - finishState = FinalApplicationStatus.KILLED; - } else if (jobImpl.getInternalState() == JobStateInternal.FAILED - || jobImpl.getInternalState() == JobStateInternal.ERROR) { - finishState = FinalApplicationStatus.FAILED; - } - StringBuffer sb = new StringBuffer(); - for (String s : job.getDiagnostics()) { - sb.append(s).append("\n"); - } - LOG.info("Setting job diagnostics to " + sb.toString()); - - String historyUrl = - MRWebAppUtil.getApplicationWebURLOnJHSWithScheme(getConfig(), - context.getApplicationID()); - LOG.info("History url is " + historyUrl); - FinishApplicationMasterRequest request = - FinishApplicationMasterRequest.newInstance(finishState, - sb.toString(), historyUrl); - while (true) { - FinishApplicationMasterResponse response = - scheduler.finishApplicationMaster(request); - if (response.getIsUnregistered()) { - break; - } - LOG.info("Waiting for application to be successfully unregistered."); - Thread.sleep(rmPollInterval); - } + doUnregistration(); } catch(Exception are) { LOG.error("Exception while unregistering ", are); + // if unregistration failed, isLastAMRetry needs to be recalculated + // to see whether AM really has the chance to retry + RunningAppContext raContext = (RunningAppContext) context; + raContext.computeIsLastAMRetry(); + } + } + + @VisibleForTesting + protected void doUnregistration() + throws YarnException, IOException, InterruptedException { + FinalApplicationStatus finishState = FinalApplicationStatus.UNDEFINED; + JobImpl jobImpl = (JobImpl)job; + if (jobImpl.getInternalState() == JobStateInternal.SUCCEEDED) { + finishState = FinalApplicationStatus.SUCCEEDED; + } else if (jobImpl.getInternalState() == JobStateInternal.KILLED + || (jobImpl.getInternalState() == JobStateInternal.RUNNING && isSignalled)) { + finishState = FinalApplicationStatus.KILLED; + } else if (jobImpl.getInternalState() == JobStateInternal.FAILED + || jobImpl.getInternalState() == JobStateInternal.ERROR) { + finishState = FinalApplicationStatus.FAILED; + } + StringBuffer sb = new StringBuffer(); + for (String s : job.getDiagnostics()) { + sb.append(s).append("\n"); + } + LOG.info("Setting job diagnostics to " + sb.toString()); + + String historyUrl = + MRWebAppUtil.getApplicationWebURLOnJHSWithScheme(getConfig(), + context.getApplicationID()); + LOG.info("History url is " + historyUrl); + FinishApplicationMasterRequest request = + FinishApplicationMasterRequest.newInstance(finishState, + sb.toString(), historyUrl); + while (true) { + FinishApplicationMasterResponse response = + scheduler.finishApplicationMaster(request); + if (response.getIsUnregistered()) { + // When excepting ClientService, other services are already stopped, + // it is safe to let clients know the final states. ClientService + // should wait for some time so clients have enough time to know the + // final states. + RunningAppContext raContext = (RunningAppContext) context; + raContext.markSuccessfulUnregistration(); + break; + } + LOG.info("Waiting for application to be successfully unregistered."); + Thread.sleep(rmPollInterval); } } @@ -235,7 +254,6 @@ public abstract class RMCommunicator extends AbstractService protected void startAllocatorThread() { allocatorThread = new Thread(new Runnable() { - @SuppressWarnings("unchecked") @Override public void run() { while (!stopped.get() && !Thread.currentThread().isInterrupted()) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java index 3a7e865c7bf..de573fe3007 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java @@ -136,9 +136,9 @@ public class MRApp extends MRAppMaster { } public MRApp(int maps, int reduces, boolean autoComplete, String testName, - boolean cleanOnStart, Clock clock, boolean shutdown) { + boolean cleanOnStart, Clock clock, boolean unregistered) { this(maps, reduces, autoComplete, testName, cleanOnStart, 1, clock, - shutdown); + unregistered); } public MRApp(int maps, int reduces, boolean autoComplete, String testName, @@ -147,8 +147,8 @@ public class MRApp extends MRAppMaster { } public MRApp(int maps, int reduces, boolean autoComplete, String testName, - boolean cleanOnStart, boolean shutdown) { - this(maps, reduces, autoComplete, testName, cleanOnStart, 1, shutdown); + boolean cleanOnStart, boolean unregistered) { + this(maps, reduces, autoComplete, testName, cleanOnStart, 1, unregistered); } @Override @@ -181,16 +181,16 @@ public class MRApp extends MRAppMaster { } public MRApp(int maps, int reduces, boolean autoComplete, String testName, - boolean cleanOnStart, int startCount, boolean shutdown) { + boolean cleanOnStart, int startCount, boolean unregistered) { this(maps, reduces, autoComplete, testName, cleanOnStart, startCount, - new SystemClock(), shutdown); + new SystemClock(), unregistered); } public MRApp(int maps, int reduces, boolean autoComplete, String testName, - boolean cleanOnStart, int startCount, Clock clock, boolean shutdown) { + boolean cleanOnStart, int startCount, Clock clock, boolean unregistered) { this(getApplicationAttemptId(applicationId, startCount), getContainerId( applicationId, startCount), maps, reduces, autoComplete, testName, - cleanOnStart, startCount, clock, shutdown); + cleanOnStart, startCount, clock, unregistered); } public MRApp(int maps, int reduces, boolean autoComplete, String testName, @@ -202,9 +202,9 @@ public class MRApp extends MRAppMaster { public MRApp(ApplicationAttemptId appAttemptId, ContainerId amContainerId, int maps, int reduces, boolean autoComplete, String testName, - boolean cleanOnStart, int startCount, boolean shutdown) { + boolean cleanOnStart, int startCount, boolean unregistered) { this(appAttemptId, amContainerId, maps, reduces, autoComplete, testName, - cleanOnStart, startCount, new SystemClock(), shutdown); + cleanOnStart, startCount, new SystemClock(), unregistered); } public MRApp(ApplicationAttemptId appAttemptId, ContainerId amContainerId, @@ -216,7 +216,7 @@ public class MRApp extends MRAppMaster { public MRApp(ApplicationAttemptId appAttemptId, ContainerId amContainerId, int maps, int reduces, boolean autoComplete, String testName, - boolean cleanOnStart, int startCount, Clock clock, boolean shutdown) { + boolean cleanOnStart, int startCount, Clock clock, boolean unregistered) { super(appAttemptId, amContainerId, NM_HOST, NM_PORT, NM_HTTP_PORT, clock, System .currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS); this.testWorkDir = new File("target", testName); @@ -237,7 +237,7 @@ public class MRApp extends MRAppMaster { this.autoComplete = autoComplete; // If safeToReportTerminationToUser is set to true, we can verify whether // the job can reaches the final state when MRAppMaster shuts down. - this.safeToReportTerminationToUser.set(shutdown); + this.successfullyUnregistered.set(unregistered); } @Override diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockAppContext.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockAppContext.java index 0496072986e..d33e734f834 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockAppContext.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockAppContext.java @@ -137,7 +137,7 @@ public class MockAppContext implements AppContext { } @Override - public boolean safeToReportTerminationToUser() { + public boolean hasSuccessfullyUnregistered() { // bogus - Not Required return true; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java index bd8baf400fc..116c32cc7b2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java @@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.app; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; @@ -41,10 +42,16 @@ import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.v2.api.records.JobReport; import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; +import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; +import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; +import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator; +import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.junit.Assert; import org.junit.Test; @@ -185,25 +192,19 @@ public class TestJobEndNotifier extends JobEndNotifier { } @Test - public void testNotificationOnNormalShutdown() throws Exception { + public void testNotificationOnLastRetryNormalShutdown() throws Exception { HttpServer server = startHttpServer(); // Act like it is the second attempt. Default max attempts is 2 - MRApp app = spy(new MRApp(2, 2, true, this.getClass().getName(), true, 2)); - // Make use of safeToReportflag so that we can look at final job-state as - // seen by real users. - app.safeToReportTerminationToUser.set(false); + MRApp app = spy(new MRAppWithCustomContainerAllocator( + 2, 2, true, this.getClass().getName(), true, 2, true)); doNothing().when(app).sysexit(); Configuration conf = new Configuration(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); - // Even though auto-complete is true, because app is not shut-down yet, user - // will only see RUNNING state. app.waitForInternalState(job, JobStateInternal.SUCCEEDED); - app.waitForState(job, JobState.RUNNING); - // Now shutdown. User should see SUCCEEDED state. + // Unregistration succeeds: successfullyUnregistered is set app.shutDownJob(); - app.waitForState(job, JobState.SUCCEEDED); Assert.assertEquals(true, app.isLastAMRetry()); Assert.assertEquals(1, JobEndServlet.calledTimes); Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED", @@ -214,24 +215,25 @@ public class TestJobEndNotifier extends JobEndNotifier { } @Test - public void testNotificationOnNonLastRetryShutdown() throws Exception { + public void testAbsentNotificationOnNotLastRetryUnregistrationFailure() + throws Exception { HttpServer server = startHttpServer(); - MRApp app = spy(new MRApp(2, 2, false, this.getClass().getName(), true)); + MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, + this.getClass().getName(), true, 1, false)); doNothing().when(app).sysexit(); - // Make use of safeToReportflag so that we can look at final job-state as - // seen by real users. - app.safeToReportTerminationToUser.set(false); Configuration conf = new Configuration(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); - JobImpl job = (JobImpl)app.submit(new Configuration()); + JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState(job, JobStateInternal.REBOOT); + // Now shutdown. + // Unregistration fails: isLastAMRetry is recalculated, this is not + app.shutDownJob(); // Not the last AM attempt. So user should that the job is still running. app.waitForState(job, JobState.RUNNING); - app.shutDownJob(); Assert.assertEquals(false, app.isLastAMRetry()); Assert.assertEquals(0, JobEndServlet.calledTimes); Assert.assertEquals(null, JobEndServlet.requestUri); @@ -239,6 +241,33 @@ public class TestJobEndNotifier extends JobEndNotifier { server.stop(); } + @Test + public void testNotificationOnLastRetryUnregistrationFailure() + throws Exception { + HttpServer server = startHttpServer(); + MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, + this.getClass().getName(), true, 2, false)); + doNothing().when(app).sysexit(); + Configuration conf = new Configuration(); + conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, + JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); + JobImpl job = (JobImpl)app.submit(conf); + app.waitForState(job, JobState.RUNNING); + app.getContext().getEventHandler() + .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); + app.waitForInternalState(job, JobStateInternal.REBOOT); + // Now shutdown. User should see FAILED state. + // Unregistration fails: isLastAMRetry is recalculated, this is + app.shutDownJob(); + Assert.assertEquals(true, app.isLastAMRetry()); + Assert.assertEquals(1, JobEndServlet.calledTimes); + Assert.assertEquals("jobid=" + job.getID() + "&status=FAILED", + JobEndServlet.requestUri.getQuery()); + Assert.assertEquals(JobState.FAILED.toString(), + JobEndServlet.foundJobState); + server.stop(); + } + private static HttpServer startHttpServer() throws Exception { new File(System.getProperty( "build.webapps", "build/webapps") + "/test").mkdirs(); @@ -280,4 +309,83 @@ public class TestJobEndNotifier extends JobEndNotifier { } } + private class MRAppWithCustomContainerAllocator extends MRApp { + + private boolean crushUnregistration; + + public MRAppWithCustomContainerAllocator(int maps, int reduces, + boolean autoComplete, String testName, boolean cleanOnStart, + int startCount, boolean crushUnregistration) { + super(maps, reduces, autoComplete, testName, cleanOnStart, startCount, + false); + this.crushUnregistration = crushUnregistration; + } + + @Override + protected ContainerAllocator createContainerAllocator( + ClientService clientService, AppContext context) { + context = spy(context); + when(context.getEventHandler()).thenReturn(null); + when(context.getApplicationID()).thenReturn(null); + return new CustomContainerAllocator(this, context); + } + + private class CustomContainerAllocator + extends RMCommunicator + implements ContainerAllocator, RMHeartbeatHandler { + private MRAppWithCustomContainerAllocator app; + private MRAppContainerAllocator allocator = + new MRAppContainerAllocator(); + + public CustomContainerAllocator( + MRAppWithCustomContainerAllocator app, AppContext context) { + super(null, context); + this.app = app; + } + + @Override + public void serviceInit(Configuration conf) { + } + + @Override + public void serviceStart() { + } + + @Override + public void serviceStop() { + unregister(); + } + + @Override + protected void doUnregistration() + throws YarnException, IOException, InterruptedException { + if (crushUnregistration) { + app.successfullyUnregistered.set(true); + } else { + throw new YarnException("test exception"); + } + } + + @Override + public void handle(ContainerAllocatorEvent event) { + allocator.handle(event); + } + + @Override + public long getLastHeartbeatTime() { + return allocator.getLastHeartbeatTime(); + } + + @Override + public void runOnNextHeartbeat(Runnable callback) { + allocator.runOnNextHeartbeat(callback); + } + + @Override + protected void heartbeat() throws Exception { + } + } + + } + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java index 1987d706139..cc752f1f6ec 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestMRApp.java @@ -29,7 +29,6 @@ import java.util.Iterator; import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; @@ -44,7 +43,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobUpdatedNodesEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; @@ -55,15 +53,12 @@ import org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.util.Clock; import org.junit.Test; /** @@ -384,12 +379,13 @@ public class TestMRApp { // AM is not unregistered Assert.assertEquals(JobState.RUNNING, job.getState()); // imitate that AM is unregistered - app.safeToReportTerminationToUser.set(true); + app.successfullyUnregistered.set(true); app.waitForState(job, JobState.SUCCEEDED); } @Test - public void testJobRebootNotLastRetry() throws Exception { + public void testJobRebootNotLastRetryOnUnregistrationFailure() + throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); @@ -408,10 +404,12 @@ public class TestMRApp { } @Test - public void testJobRebootOnLastRetry() throws Exception { + public void testJobRebootOnLastRetryOnUnregistrationFailure() + throws Exception { // make startCount as 2 since this is last retry which equals to // DEFAULT_MAX_AM_RETRY - MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true, 2); + // The last param mocks the unregistration failure + MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true, 2, false); Configuration conf = new Configuration(); Job job = app.submit(conf); @@ -425,8 +423,10 @@ public class TestMRApp { app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); - // return exteranl state as ERROR if this is the last retry - app.waitForState(job, JobState.ERROR); + app.waitForInternalState((JobImpl) job, JobStateInternal.REBOOT); + // return exteranl state as RUNNING if this is the last retry while + // unregistration fails + app.waitForState(job, JobState.RUNNING); } private final class MRAppWithSpiedJob extends MRApp { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java index 7f968ca70f6..3d555f2b7f1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java @@ -869,7 +869,7 @@ public class TestRuntimeEstimators { } @Override - public boolean safeToReportTerminationToUser() { + public boolean hasSuccessfullyUnregistered() { // bogus - Not Required return true; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java index 496c1e35068..1c92b11b5b7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java @@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.app; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -36,18 +37,17 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; -import org.apache.hadoop.mapreduce.v2.app.client.MRClientService; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; +import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator; import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -57,7 +57,7 @@ import org.apache.hadoop.service.Service; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -75,7 +75,44 @@ import org.junit.Test; private Path stagingJobPath = new Path(stagingJobDir); private final static RecordFactory recordFactory = RecordFactoryProvider. getRecordFactory(null); - + + @Test + public void testDeletionofStagingOnUnregistrationFailure() + throws IOException { + testDeletionofStagingOnUnregistrationFailure(2, false); + testDeletionofStagingOnUnregistrationFailure(1, true); + } + + @SuppressWarnings("resource") + private void testDeletionofStagingOnUnregistrationFailure( + int maxAttempts, boolean shouldHaveDeleted) throws IOException { + conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir); + fs = mock(FileSystem.class); + when(fs.delete(any(Path.class), anyBoolean())).thenReturn(true); + //Staging Dir exists + String user = UserGroupInformation.getCurrentUser().getShortUserName(); + Path stagingDir = MRApps.getStagingAreaDir(conf, user); + when(fs.exists(stagingDir)).thenReturn(true); + ApplicationId appId = ApplicationId.newInstance(0, 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); + JobId jobid = recordFactory.newRecordInstance(JobId.class); + jobid.setAppId(appId); + TestMRApp appMaster = new TestMRApp(attemptId, null, + JobStateInternal.RUNNING, maxAttempts); + appMaster.crushUnregistration = true; + appMaster.init(conf); + appMaster.start(); + appMaster.shutDownJob(); + ((RunningAppContext) appMaster.getContext()).computeIsLastAMRetry(); + if (shouldHaveDeleted) { + Assert.assertEquals(new Boolean(true), appMaster.isLastAMRetry()); + verify(fs).delete(stagingJobPath, true); + } else { + Assert.assertEquals(new Boolean(false), appMaster.isLastAMRetry()); + verify(fs, never()).delete(stagingJobPath, true); + } + } + @Test public void testDeletionofStaging() throws IOException { conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir); @@ -204,6 +241,7 @@ import org.junit.Test; ContainerAllocator allocator; boolean testIsLastAMRetry = false; JobStateInternal jobStateInternal; + boolean crushUnregistration = false; public TestMRApp(ApplicationAttemptId applicationAttemptId, ContainerAllocator allocator, int maxAppAttempts) { @@ -211,6 +249,7 @@ import org.junit.Test; applicationAttemptId, 1), "testhost", 2222, 3333, System.currentTimeMillis(), maxAppAttempts); this.allocator = allocator; + this.successfullyUnregistered.set(true); } public TestMRApp(ApplicationAttemptId applicationAttemptId, @@ -229,7 +268,11 @@ import org.junit.Test; protected ContainerAllocator createContainerAllocator( final ClientService clientService, final AppContext context) { if(allocator == null) { - return super.createContainerAllocator(clientService, context); + if (crushUnregistration) { + return new CustomContainerAllocator(context); + } else { + return super.createContainerAllocator(clientService, context); + } } return allocator; } @@ -280,6 +323,41 @@ import org.junit.Test; public boolean getTestIsLastAMRetry(){ return testIsLastAMRetry; } + + private class CustomContainerAllocator extends RMCommunicator + implements ContainerAllocator { + + public CustomContainerAllocator(AppContext context) { + super(null, context); + } + + @Override + public void serviceInit(Configuration conf) { + } + + @Override + public void serviceStart() { + } + + @Override + public void serviceStop() { + unregister(); + } + + @Override + protected void doUnregistration() + throws YarnException, IOException, InterruptedException { + throw new YarnException("test exception"); + } + + @Override + protected void heartbeat() throws Exception { + } + + @Override + public void handle(ContainerAllocatorEvent event) { + } + } } private final class MRAppTestCleanup extends MRApp { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java index 8fb7f1a7b93..714b753d13f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java @@ -275,7 +275,7 @@ public class TestJobImpl { AppContext mockContext = mock(AppContext.class); when(mockContext.isLastAMRetry()).thenReturn(true); - when(mockContext.safeToReportTerminationToUser()).thenReturn(false); + when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, mockContext); completeJobTasks(job); assertJobState(job, JobStateInternal.COMMITTING); @@ -285,7 +285,7 @@ public class TestJobImpl { assertJobState(job, JobStateInternal.REBOOT); // return the external state as ERROR since this is last retry. Assert.assertEquals(JobState.RUNNING, job.getState()); - when(mockContext.safeToReportTerminationToUser()).thenReturn(true); + when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); Assert.assertEquals(JobState.ERROR, job.getState()); dispatcher.stop(); @@ -594,7 +594,7 @@ public class TestJobImpl { new JobDiagnosticsUpdateEvent(jobId, diagMsg); MRAppMetrics mrAppMetrics = MRAppMetrics.create(); AppContext mockContext = mock(AppContext.class); - when(mockContext.safeToReportTerminationToUser()).thenReturn(true); + when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); JobImpl job = new JobImpl(jobId, Records .newRecord(ApplicationAttemptId.class), new Configuration(), mock(EventHandler.class), @@ -705,7 +705,7 @@ public class TestJobImpl { commitHandler.start(); AppContext mockContext = mock(AppContext.class); - when(mockContext.safeToReportTerminationToUser()).thenReturn(false); + when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext); JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); @@ -722,7 +722,7 @@ public class TestJobImpl { job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE)); assertJobState(job, JobStateInternal.FAILED); Assert.assertEquals(JobState.RUNNING, job.getState()); - when(mockContext.safeToReportTerminationToUser()).thenReturn(true); + when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); Assert.assertEquals(JobState.FAILED, job.getState()); dispatcher.stop(); @@ -762,7 +762,7 @@ public class TestJobImpl { JobId jobId = TypeConverter.toYarn(jobID); if (appContext == null) { appContext = mock(AppContext.class); - when(appContext.safeToReportTerminationToUser()).thenReturn(true); + when(appContext.hasSuccessfullyUnregistered()).thenReturn(true); } StubbedJob job = new StubbedJob(jobId, ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0), diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java index cdbecd2304e..90dbe489f4b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java @@ -88,6 +88,10 @@ public class TestLocalContainerAllocator { protected void register() { } + @Override + protected void unregister() { + } + @Override protected void startAllocatorThread() { allocatorThread = new Thread(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java index 7de35ff319e..b7823a0c50d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java @@ -389,7 +389,7 @@ public class JobHistory extends AbstractService implements HistoryContext { } @Override - public boolean safeToReportTerminationToUser() { + public boolean hasSuccessfullyUnregistered() { // bogus - Not Required return true; } From caa4abd30cfc4361c7bc9f212a9092840d7c3b53 Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Mon, 7 Oct 2013 02:57:24 +0000 Subject: [PATCH 068/133] HDFS-5259. Support client which combines appended data with old data before sends it to NFS server. Contributed by Brandon Li git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529730 13f79535-47bb-0310-9956-ffa450edef68 --- .../nfs/nfs3/request/WRITE3Request.java | 12 ++- .../hadoop/hdfs/nfs/nfs3/OpenFileCtx.java | 97 ++++++++++++----- .../apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java | 67 ++++++++++-- .../hadoop/hdfs/nfs/nfs3/TestWrites.java | 100 ++++++++++++++++++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + 5 files changed, 244 insertions(+), 35 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/WRITE3Request.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/WRITE3Request.java index b6b8fd0cbd4..b04e7fca74d 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/WRITE3Request.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/request/WRITE3Request.java @@ -28,8 +28,8 @@ import org.apache.hadoop.oncrpc.XDR; * WRITE3 Request */ public class WRITE3Request extends RequestWithHandle { - private final long offset; - private final int count; + private long offset; + private int count; private final WriteStableHow stableHow; private final ByteBuffer data; @@ -54,10 +54,18 @@ public class WRITE3Request extends RequestWithHandle { return this.offset; } + public void setOffset(long offset) { + this.offset = offset; + } + public int getCount() { return this.count; } + public void setCount(int count) { + this.count = count; + } + public WriteStableHow getStableHow() { return this.stableHow; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java index ff2b33b1bf4..1aef083cc30 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java @@ -22,6 +22,7 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; +import java.nio.ByteBuffer; import java.nio.channels.ClosedChannelException; import java.security.InvalidParameterException; import java.util.EnumSet; @@ -55,6 +56,7 @@ import org.apache.hadoop.oncrpc.security.VerifierNone; import org.apache.hadoop.util.Daemon; import org.jboss.netty.channel.Channel; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; /** @@ -360,6 +362,30 @@ class OpenFileCtx { } } + @VisibleForTesting + public static void alterWriteRequest(WRITE3Request request, long cachedOffset) { + long offset = request.getOffset(); + int count = request.getCount(); + long smallerCount = offset + count - cachedOffset; + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Got overwrite with appended data (%d-%d)," + + " current offset %d," + " drop the overlapped section (%d-%d)" + + " and append new data (%d-%d).", offset, (offset + count - 1), + cachedOffset, offset, (cachedOffset - 1), cachedOffset, (offset + + count - 1))); + } + + ByteBuffer data = request.getData(); + Preconditions.checkState(data.position() == 0, + "The write request data has non-zero position"); + data.position((int) (cachedOffset - offset)); + Preconditions.checkState(data.limit() - data.position() == smallerCount, + "The write request buffer has wrong limit/position regarding count"); + + request.setOffset(cachedOffset); + request.setCount((int) smallerCount); + } + /** * Creates and adds a WriteCtx into the pendingWrites map. This is a * synchronized method to handle concurrent writes. @@ -372,12 +398,40 @@ class OpenFileCtx { long offset = request.getOffset(); int count = request.getCount(); long cachedOffset = nextOffset.get(); - + int originalCount = WriteCtx.INVALID_ORIGINAL_COUNT; + if (LOG.isDebugEnabled()) { LOG.debug("requesed offset=" + offset + " and current offset=" + cachedOffset); } + // Handle a special case first + if ((offset < cachedOffset) && (offset + count > cachedOffset)) { + // One Linux client behavior: after a file is closed and reopened to + // write, the client sometimes combines previous written data(could still + // be in kernel buffer) with newly appended data in one write. This is + // usually the first write after file reopened. In this + // case, we log the event and drop the overlapped section. + LOG.warn(String.format("Got overwrite with appended data (%d-%d)," + + " current offset %d," + " drop the overlapped section (%d-%d)" + + " and append new data (%d-%d).", offset, (offset + count - 1), + cachedOffset, offset, (cachedOffset - 1), cachedOffset, (offset + + count - 1))); + + if (!pendingWrites.isEmpty()) { + LOG.warn("There are other pending writes, fail this jumbo write"); + return null; + } + + LOG.warn("Modify this write to write only the appended data"); + alterWriteRequest(request, cachedOffset); + + // Update local variable + originalCount = count; + offset = request.getOffset(); + count = request.getCount(); + } + // Fail non-append call if (offset < cachedOffset) { LOG.warn("(offset,count,nextOffset):" + "(" + offset + "," + count + "," @@ -387,8 +441,9 @@ class OpenFileCtx { DataState dataState = offset == cachedOffset ? WriteCtx.DataState.NO_DUMP : WriteCtx.DataState.ALLOW_DUMP; WriteCtx writeCtx = new WriteCtx(request.getHandle(), - request.getOffset(), request.getCount(), request.getStableHow(), - request.getData().array(), channel, xid, false, dataState); + request.getOffset(), request.getCount(), originalCount, + request.getStableHow(), request.getData(), channel, xid, false, + dataState); if (LOG.isDebugEnabled()) { LOG.debug("Add new write to the list with nextOffset " + cachedOffset + " and requesed offset=" + offset); @@ -419,8 +474,7 @@ class OpenFileCtx { WRITE3Response response; long cachedOffset = nextOffset.get(); if (offset + count > cachedOffset) { - LOG.warn("Haven't noticed any partial overwrite for a sequential file" - + " write requests. Treat it as a real random write, no support."); + LOG.warn("Treat this jumbo write as a real random write, no support."); response = new WRITE3Response(Nfs3Status.NFS3ERR_INVAL, wccData, 0, WriteStableHow.UNSTABLE, Nfs3Constant.WRITE_COMMIT_VERF); } else { @@ -633,6 +687,7 @@ class OpenFileCtx { private void addWrite(WriteCtx writeCtx) { long offset = writeCtx.getOffset(); int count = writeCtx.getCount(); + // For the offset range (min, max), min is inclusive, and max is exclusive pendingWrites.put(new OffsetRange(offset, offset + count), writeCtx); } @@ -745,19 +800,7 @@ class OpenFileCtx { long offset = writeCtx.getOffset(); int count = writeCtx.getCount(); WriteStableHow stableHow = writeCtx.getStableHow(); - byte[] data = null; - try { - data = writeCtx.getData(); - } catch (Exception e1) { - LOG.error("Failed to get request data offset:" + offset + " count:" - + count + " error:" + e1); - // Cleanup everything - cleanup(); - return; - } - Preconditions.checkState(data.length == count); - FileHandle handle = writeCtx.getHandle(); if (LOG.isDebugEnabled()) { LOG.debug("do write, fileId: " + handle.getFileId() + " offset: " @@ -766,8 +809,8 @@ class OpenFileCtx { try { // The write is not protected by lock. asyncState is used to make sure - // there is one thread doing write back at any time - fos.write(data, 0, count); + // there is one thread doing write back at any time + writeCtx.writeData(fos); long flushedOffset = getFlushedOffset(); if (flushedOffset != (offset + count)) { @@ -776,10 +819,6 @@ class OpenFileCtx { + (offset + count)); } - if (LOG.isDebugEnabled()) { - LOG.debug("After writing " + handle.getFileId() + " at offset " - + offset + ", update the memory count."); - } // Reduce memory occupation size if request was allowed dumped if (writeCtx.getDataState() == WriteCtx.DataState.ALLOW_DUMP) { @@ -787,6 +826,11 @@ class OpenFileCtx { if (writeCtx.getDataState() == WriteCtx.DataState.ALLOW_DUMP) { writeCtx.setDataState(WriteCtx.DataState.NO_DUMP); updateNonSequentialWriteInMemory(-count); + if (LOG.isDebugEnabled()) { + LOG.debug("After writing " + handle.getFileId() + " at offset " + + offset + ", updated the memory count, new value:" + + nonSequentialWriteInMemory.get()); + } } } } @@ -794,6 +838,11 @@ class OpenFileCtx { if (!writeCtx.getReplied()) { WccAttr preOpAttr = latestAttr.getWccAttr(); WccData fileWcc = new WccData(preOpAttr, latestAttr); + if (writeCtx.getOriginalCount() != WriteCtx.INVALID_ORIGINAL_COUNT) { + LOG.warn("Return original count:" + writeCtx.getOriginalCount() + + " instead of real data count:" + count); + count = writeCtx.getOriginalCount(); + } WRITE3Response response = new WRITE3Response(Nfs3Status.NFS3_OK, fileWcc, count, stableHow, Nfs3Constant.WRITE_COMMIT_VERF); Nfs3Utils.writeChannel(channel, response.writeHeaderAndResponse( @@ -801,7 +850,7 @@ class OpenFileCtx { } } catch (IOException e) { LOG.error("Error writing to fileId " + handle.getFileId() + " at offset " - + offset + " and length " + data.length, e); + + offset + " and length " + count, e); if (!writeCtx.getReplied()) { WRITE3Response response = new WRITE3Response(Nfs3Status.NFS3ERR_IO); Nfs3Utils.writeChannel(channel, response.writeHeaderAndResponse( diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java index f1af6520940..05e0fb7c2c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java @@ -20,13 +20,16 @@ package org.apache.hadoop.hdfs.nfs.nfs3; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; +import java.nio.ByteBuffer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.nfs.nfs3.FileHandle; import org.apache.hadoop.nfs.nfs3.Nfs3Constant.WriteStableHow; import org.jboss.netty.channel.Channel; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; /** @@ -50,8 +53,17 @@ class WriteCtx { private final FileHandle handle; private final long offset; private final int count; + + //Only needed for overlapped write, referring OpenFileCtx.addWritesToCache() + private final int originalCount; + public static final int INVALID_ORIGINAL_COUNT = -1; + + public int getOriginalCount() { + return originalCount; + } + private final WriteStableHow stableHow; - private volatile byte[] data; + private volatile ByteBuffer data; private final Channel channel; private final int xid; @@ -89,9 +101,13 @@ class WriteCtx { } return 0; } + + // Resized write should not allow dump + Preconditions.checkState(originalCount == INVALID_ORIGINAL_COUNT); + this.raf = raf; dumpFileOffset = dumpOut.getChannel().position(); - dumpOut.write(data, 0, count); + dumpOut.write(data.array(), 0, count); if (LOG.isDebugEnabled()) { LOG.debug("After dump, new dumpFileOffset:" + dumpFileOffset); } @@ -127,7 +143,8 @@ class WriteCtx { return stableHow; } - byte[] getData() throws IOException { + @VisibleForTesting + ByteBuffer getData() throws IOException { if (dataState != DataState.DUMPED) { synchronized (this) { if (dataState != DataState.DUMPED) { @@ -143,15 +160,45 @@ class WriteCtx { private void loadData() throws IOException { Preconditions.checkState(data == null); - data = new byte[count]; + byte[] rawData = new byte[count]; raf.seek(dumpFileOffset); - int size = raf.read(data, 0, count); + int size = raf.read(rawData, 0, count); if (size != count) { throw new IOException("Data count is " + count + ", but read back " + size + "bytes"); } + data = ByteBuffer.wrap(rawData); } + public void writeData(HdfsDataOutputStream fos) throws IOException { + Preconditions.checkState(fos != null); + + ByteBuffer dataBuffer = null; + try { + dataBuffer = getData(); + } catch (Exception e1) { + LOG.error("Failed to get request data offset:" + offset + " count:" + + count + " error:" + e1); + throw new IOException("Can't get WriteCtx.data"); + } + + byte[] data = dataBuffer.array(); + int position = dataBuffer.position(); + int limit = dataBuffer.limit(); + Preconditions.checkState(limit - position == count); + // Modified write has a valid original count + if (position != 0) { + if (limit != getOriginalCount()) { + throw new IOException("Modified write has differnt original size." + + "buff position:" + position + " buff limit:" + limit + ". " + + toString()); + } + } + + // Now write data + fos.write(data, position, count); + } + Channel getChannel() { return channel; } @@ -168,11 +215,13 @@ class WriteCtx { this.replied = replied; } - WriteCtx(FileHandle handle, long offset, int count, WriteStableHow stableHow, - byte[] data, Channel channel, int xid, boolean replied, DataState dataState) { + WriteCtx(FileHandle handle, long offset, int count, int originalCount, + WriteStableHow stableHow, ByteBuffer data, Channel channel, int xid, + boolean replied, DataState dataState) { this.handle = handle; this.offset = offset; this.count = count; + this.originalCount = originalCount; this.stableHow = stableHow; this.data = data; this.channel = channel; @@ -185,7 +234,7 @@ class WriteCtx { @Override public String toString() { return "Id:" + handle.getFileId() + " offset:" + offset + " count:" + count - + " stableHow:" + stableHow + " replied:" + replied + " dataState:" - + dataState + " xid:" + xid; + + " originalCount:" + originalCount + " stableHow:" + stableHow + + " replied:" + replied + " dataState:" + dataState + " xid:" + xid; } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java new file mode 100644 index 00000000000..d24e5d1fa8a --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.nfs.nfs3; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import junit.framework.Assert; + +import org.apache.hadoop.nfs.nfs3.FileHandle; +import org.apache.hadoop.nfs.nfs3.Nfs3Constant.WriteStableHow; +import org.apache.hadoop.nfs.nfs3.request.WRITE3Request; +import org.junit.Test; + +public class TestWrites { + @Test + public void testAlterWriteRequest() throws IOException { + int len = 20; + byte[] data = new byte[len]; + ByteBuffer buffer = ByteBuffer.wrap(data); + + for (int i = 0; i < len; i++) { + buffer.put((byte) i); + } + buffer.flip(); + int originalCount = buffer.array().length; + WRITE3Request request = new WRITE3Request(new FileHandle(), 0, data.length, + WriteStableHow.UNSTABLE, buffer); + + WriteCtx writeCtx1 = new WriteCtx(request.getHandle(), request.getOffset(), + request.getCount(), WriteCtx.INVALID_ORIGINAL_COUNT, + request.getStableHow(), request.getData(), null, 1, false, + WriteCtx.DataState.NO_DUMP); + + Assert.assertTrue(writeCtx1.getData().array().length == originalCount); + + // Now change the write request + OpenFileCtx.alterWriteRequest(request, 12); + + WriteCtx writeCtx2 = new WriteCtx(request.getHandle(), request.getOffset(), + request.getCount(), originalCount, request.getStableHow(), + request.getData(), null, 2, false, WriteCtx.DataState.NO_DUMP); + ByteBuffer appendedData = writeCtx2.getData(); + + int position = appendedData.position(); + int limit = appendedData.limit(); + Assert.assertTrue(position == 12); + Assert.assertTrue(limit - position == 8); + Assert.assertTrue(appendedData.get(position) == (byte) 12); + Assert.assertTrue(appendedData.get(position + 1) == (byte) 13); + Assert.assertTrue(appendedData.get(position + 2) == (byte) 14); + Assert.assertTrue(appendedData.get(position + 7) == (byte) 19); + + // Test current file write offset is at boundaries + buffer.position(0); + request = new WRITE3Request(new FileHandle(), 0, data.length, + WriteStableHow.UNSTABLE, buffer); + OpenFileCtx.alterWriteRequest(request, 1); + WriteCtx writeCtx3 = new WriteCtx(request.getHandle(), request.getOffset(), + request.getCount(), originalCount, request.getStableHow(), + request.getData(), null, 2, false, WriteCtx.DataState.NO_DUMP); + appendedData = writeCtx3.getData(); + position = appendedData.position(); + limit = appendedData.limit(); + Assert.assertTrue(position == 1); + Assert.assertTrue(limit - position == 19); + Assert.assertTrue(appendedData.get(position) == (byte) 1); + Assert.assertTrue(appendedData.get(position + 18) == (byte) 19); + + // Reset buffer position before test another boundary + buffer.position(0); + request = new WRITE3Request(new FileHandle(), 0, data.length, + WriteStableHow.UNSTABLE, buffer); + OpenFileCtx.alterWriteRequest(request, 19); + WriteCtx writeCtx4 = new WriteCtx(request.getHandle(), request.getOffset(), + request.getCount(), originalCount, request.getStableHow(), + request.getData(), null, 2, false, WriteCtx.DataState.NO_DUMP); + appendedData = writeCtx4.getData(); + position = appendedData.position(); + limit = appendedData.limit(); + Assert.assertTrue(position == 19); + Assert.assertTrue(limit - position == 1); + Assert.assertTrue(appendedData.get(position) == (byte) 19); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e92325720dd..a30f32a6b7e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -403,6 +403,9 @@ Release 2.1.2 - UNRELEASED HDFS-5299. DFS client hangs in updatePipeline RPC when failover happened. (Vinay via jing9) + HDFS-5259. Support client which combines appended data with old data + before sends it to NFS server. (brandonli) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES From 43c7a834dcc6a77d85b5991d584c6d28ddfa6537 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Mon, 7 Oct 2013 05:17:00 +0000 Subject: [PATCH 069/133] Preparing for hadoop-2.2.0 release. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529750 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 20 ++++--------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 14 +------------ hadoop-mapreduce-project/CHANGES.txt | 14 +------------ hadoop-yarn-project/CHANGES.txt | 14 +------------ 4 files changed, 7 insertions(+), 55 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 12911a3cdbc..e486f2a564e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -390,22 +390,7 @@ Release 2.3.0 - UNRELEASED HADOOP-9981. globStatus should minimize its listStatus and getFileStatus calls. (Contributed by Colin Patrick McCabe) -Release 2.2.0 - UNRELEASED - - INCOMPATIBLE CHANGES - - NEW FEATURES - - IMPROVEMENTS - - OPTIMIZATIONS - - BUG FIXES - - HADOOP-8315. Support SASL-authenticated ZooKeeper in ActiveStandbyElector - (todd) - -Release 2.1.2 - UNRELEASED +Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES @@ -422,6 +407,9 @@ Release 2.1.2 - UNRELEASED HADOOP-9758. Provide configuration option for FileSystem/FileContext symlink resolution. (Andrew Wang via Colin Patrick McCabe) + HADOOP-8315. Support SASL-authenticated ZooKeeper in ActiveStandbyElector + (todd) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index a30f32a6b7e..596335388bc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -333,19 +333,7 @@ Release 2.3.0 - UNRELEASED HDFS-5266. ElasticByteBufferPool#Key does not implement equals. (cnauroth) -Release 2.2.0 - UNRELEASED - - INCOMPATIBLE CHANGES - - NEW FEATURES - - IMPROVEMENTS - - OPTIMIZATIONS - - BUG FIXES - -Release 2.1.2 - UNRELEASED +Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 5ca29b021b4..292564280c5 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -187,19 +187,7 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5514. Fix TestRMContainerAllocator. (Zhijie Shen via acmurthy) -Release 2.2.0 - UNRELEASED - - INCOMPATIBLE CHANGES - - NEW FEATURES - - IMPROVEMENTS - - OPTIMIZATIONS - - BUG FIXES - -Release 2.1.2 - UNRELEASED +Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0735120b4e7..270445b315e 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -66,19 +66,7 @@ Release 2.3.0 - UNRELEASED YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza) -Release 2.2.0 - UNRELEASED - - INCOMPATIBLE CHANGES - - NEW FEATURES - - IMPROVEMENTS - - OPTIMIZATIONS - - BUG FIXES - -Release 2.1.2 - UNRELEASED +Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES From b8f1cf31926dd20caea02f620d93688dad53caa8 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Mon, 7 Oct 2013 05:30:18 +0000 Subject: [PATCH 070/133] HDFS-4817. Moving changelog to Release 2.2.0 section to reflect the backport. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529751 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 596335388bc..e06919de27a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -262,9 +262,6 @@ Release 2.3.0 - UNRELEASED HDFS-4278. Log an ERROR when DFS_BLOCK_ACCESS_TOKEN_ENABLE config is disabled but security is turned on. (Kousuke Saruta via harsh) - HDFS-4817. Make HDFS advisory caching configurable on a per-file basis. - (Colin Patrick McCabe) - HDFS-5004. Add additional JMX bean for NameNode status data (Trevor Lorimer via cos) @@ -339,6 +336,9 @@ Release 2.2.0 - 2013-10-13 NEW FEATURES + HDFS-4817. Make HDFS advisory caching configurable on a per-file basis. + (Colin Patrick McCabe) + HDFS-5230. Introduce RpcInfo to decouple XDR classes from the RPC API. (Haohui Mai via brandonli) From bac6515f968ffca611f7c9aa87938045d588d883 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Mon, 7 Oct 2013 05:39:20 +0000 Subject: [PATCH 071/133] Release notes for hadoop-2.2.0. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529756 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/main/docs/releasenotes.html | 616 ++++++++++++++++++ 1 file changed, 616 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html index 3494a97e8d2..efbaeae4b14 100644 --- a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html +++ b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html @@ -15,6 +15,622 @@ limitations under the License. --> +Hadoop 2.2.0 Release Notes + + + +

    Hadoop 2.2.0 Release Notes

    +These release notes include new developer and user-facing incompatibilities, features, and major improvements. +
    +

    Changes since Hadoop 2.1.1-beta

    +
      +
    • YARN-1278. + Blocker bug reported by Yesha Vora and fixed by Hitesh Shah
      + New AM does not start after rm restart
      +
      The new AM fails to start after RM restarts. It fails to start new Application master and job fails with below error. + + /usr/bin/mapred job -status job_1380985373054_0001 +13/10/05 15:04:04 INFO client.RMProxy: Connecting to ResourceManager at hostname +Job: job_1380985373054_0001 +Job File: /user/abc/.staging/job_1380985373054_0001/job.xml +Job Tracking URL : http://hostname:8088/cluster/app/application_1380985373054_0001 +Uber job : false +Number of maps: 0 +Number of reduces: 0 +map() completion: 0.0 +reduce() completion: 0.0 +Job state: FAILED +retired: false +reason for failure: There are no failed tasks for the job. Job is failed due to some other reason and reason can be found in the logs. +Counters: 0
    • +
    • YARN-1277. + Major sub-task reported by Suresh Srinivas and fixed by Omkar Vinit Joshi
      + Add http policy support for YARN daemons
      +
      This YARN part of HADOOP-10022.
    • +
    • YARN-1274. + Blocker bug reported by Alejandro Abdelnur and fixed by Siddharth Seth (nodemanager)
      + LCE fails to run containers that don't have resources to localize
      +
      LCE container launch assumes the usercache/USER directory exists and it is owned by the user running the container process. + +But the directory is created only if there are resources to localize by the LCE localization command, if there are not resourcdes to localize, LCE localization never executes and launching fails reporting 255 exit code and the NM logs have something like: + +{code} +2013-10-04 14:07:56,425 INFO org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor: main : command provided 1 +2013-10-04 14:07:56,425 INFO org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor: main : user is llama +2013-10-04 14:07:56,425 INFO org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor: Can't create directory llama in /yarn/nm/usercache/llama/appcache/application_1380853306301_0004/container_1380853306301_0004_01_000004 - Permission denied +{code} +
    • +
    • YARN-1273. + Major bug reported by Hitesh Shah and fixed by Hitesh Shah
      + Distributed shell does not account for start container failures reported asynchronously.
      +
      2013-10-04 22:09:15,234 ERROR [org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl #1] distributedshell.ApplicationMaster (ApplicationMaster.java:onStartContainerError(719)) - Failed to start Container container_1380920347574_0018_01_000006
    • +
    • YARN-1271. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (nodemanager)
      + "Text file busy" errors launching containers again
      +
      The error is shown below in the comments. + +MAPREDUCE-2374 fixed this by removing "-c" when running the container launch script. It looks like the "-c" got brought back during the windows branch merge, so we should remove it again.
    • +
    • YARN-1262. + Major bug reported by Sandy Ryza and fixed by Karthik Kambatla
      + TestApplicationCleanup relies on all containers assigned in a single heartbeat
      +
      TestApplicationCleanup submits container requests and waits for allocations to come in. It only sends a single node heartbeat to the node, expecting multiple containers to be assigned on this heartbeat, which not all schedulers do by default. + +This is causing the test to fail when run with the Fair Scheduler.
    • +
    • YARN-1260. + Major sub-task reported by Yesha Vora and fixed by Omkar Vinit Joshi
      + RM_HOME link breaks when webapp.https.address related properties are not specified
      +
      This issue happens in multiple node cluster where resource manager and node manager are running on different machines. + +Steps to reproduce: +1) set yarn.resourcemanager.hostname = <resourcemanager host> in yarn-site.xml +2) set hadoop.ssl.enabled = true in core-site.xml +3) Do not specify below property in yarn-site.xml +yarn.nodemanager.webapp.https.address and yarn.resourcemanager.webapp.https.address +Here, the default value of above two property will be considered. +4) Go to nodemanager web UI "https://<nodemanager host>:8044/node" +5) Click on RM_HOME link +This link redirects to "https://<nodemanager host>:8090/cluster" instead "https://<resourcemanager host>:8090/cluster" +
    • +
    • YARN-1256. + Critical sub-task reported by Bikas Saha and fixed by Xuan Gong
      + NM silently ignores non-existent service in StartContainerRequest
      +
      A container can set token service metadata for a service, say shuffle_service. If that service does not exist then the errors is silently ignored. Later, when the next container wants to access data written to shuffle_service by the first task, then it fails because the service does not have the token that was supposed to be set by the first task.
    • +
    • YARN-1254. + Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Omkar Vinit Joshi
      + NM is polluting container's credentials
      +
      Before launching the container, NM is using the same credential object and so is polluting what container should see. We should fix this.
    • +
    • YARN-1251. + Major bug reported by Junping Du and fixed by Xuan Gong (applications/distributed-shell)
      + TestDistributedShell#TestDSShell failed with timeout
      +
      TestDistributedShell#TestDSShell on trunk Jenkins are failed consistently recently. +The Stacktrace is: +{code} +java.lang.Exception: test timed out after 90000 milliseconds + at com.google.protobuf.LiteralByteString.<init>(LiteralByteString.java:234) + at com.google.protobuf.ByteString.copyFromUtf8(ByteString.java:255) + at org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos$RequestHeaderProto.getMethodNameBytes(ProtobufRpcEngineProtos.java:286) + at org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos$RequestHeaderProto.getSerializedSize(ProtobufRpcEngineProtos.java:462) + at com.google.protobuf.AbstractMessageLite.writeDelimitedTo(AbstractMessageLite.java:84) + at org.apache.hadoop.ipc.ProtobufRpcEngine$RpcMessageWithHeader.write(ProtobufRpcEngine.java:302) + at org.apache.hadoop.ipc.Client$Connection.sendRpcRequest(Client.java:989) + at org.apache.hadoop.ipc.Client.call(Client.java:1377) + at org.apache.hadoop.ipc.Client.call(Client.java:1357) + at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) + at $Proxy70.getApplicationReport(Unknown Source) + at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:137) + at sun.reflect.GeneratedMethodAccessor40.invoke(Unknown Source) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) + at java.lang.reflect.Method.invoke(Method.java:597) + at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:185) + at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:101) + at $Proxy71.getApplicationReport(Unknown Source) + at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:195) + at org.apache.hadoop.yarn.applications.distributedshell.Client.monitorApplication(Client.java:622) + at org.apache.hadoop.yarn.applications.distributedshell.Client.run(Client.java:597) + at org.apache.hadoop.yarn.applications.distributedshell.TestDistributedShell.testDSShell(TestDistributedShell.java:125) +{code} +For details, please refer: +https://builds.apache.org/job/PreCommit-YARN-Build/2039//testReport/
    • +
    • YARN-1247. + Major bug reported by Roman Shaposhnik and fixed by Roman Shaposhnik (nodemanager)
      + test-container-executor has gotten out of sync with the changes to container-executor
      +
      If run under the super-user account test-container-executor.c fails in multiple different places. It would be nice to fix it so that we have better testing of LCE functionality.
    • +
    • YARN-1246. + Minor improvement reported by Arpit Gupta and fixed by Arpit Gupta
      + Log application status in the rm log when app is done running
      +
      Since there is no yarn history server it becomes difficult to determine what the status of an old application is. One has to be familiar with the state transition in yarn to know what means a success. + +We should add a log at info level that captures what the finalStatus of an app is. This would be helpful while debugging applications if the RM has restarted and we no longer can use the UI.
    • +
    • YARN-1236. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (resourcemanager)
      + FairScheduler setting queue name in RMApp is not working
      +
      The fair scheduler sometimes picks a different queue than the one an application was submitted to, such as when user-as-default-queue is turned on. It needs to update the queue name in the RMApp so that this choice will be reflected in the UI. + +This isn't working because the scheduler is looking up the RMApp by application attempt id instead of app id and failing to find it.
    • +
    • YARN-1229. + Blocker bug reported by Tassapol Athiapinya and fixed by Xuan Gong (nodemanager)
      + Define constraints on Auxiliary Service names. Change ShuffleHandler service name from mapreduce.shuffle to mapreduce_shuffle.
      +
      I run sleep job. If AM fails to start, this exception could occur: + +13/09/20 11:00:23 INFO mapreduce.Job: Job job_1379673267098_0020 failed with state FAILED due to: Application application_1379673267098_0020 failed 1 times due to AM Container for appattempt_1379673267098_0020_000001 exited with exitCode: 1 due to: Exception from container-launch: +org.apache.hadoop.util.Shell$ExitCodeException: /myappcache/application_1379673267098_0020/container_1379673267098_0020_01_000001/launch_container.sh: line 12: export: `NM_AUX_SERVICE_mapreduce.shuffle=AAA0+gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= +': not a valid identifier + +at org.apache.hadoop.util.Shell.runCommand(Shell.java:464) +at org.apache.hadoop.util.Shell.run(Shell.java:379) +at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589) +at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195) +at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:270) +at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:78) +at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) +at java.util.concurrent.FutureTask.run(FutureTask.java:138) +at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) +at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) +at java.lang.Thread.run(Thread.java:662) +.Failing this attempt.. Failing the application.
    • +
    • YARN-1228. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Clean up Fair Scheduler configuration loading
      +
      Currently the Fair Scheduler is configured in two ways +* An allocations file that has a different format than the standard Hadoop configuration file, which makes it easier to specify hierarchical objects like queues and their properties. +* With properties like yarn.scheduler.fair.max.assign that are specified in the standard Hadoop configuration format. + +The standard and default way of configuring it is to use fair-scheduler.xml as the allocations file and to put the yarn.scheduler properties in yarn-site.xml. + +It is also possible to specify a different file as the allocations file, and to place the yarn.scheduler properties in fair-scheduler.xml, which will be interpreted as in the standard Hadoop configuration format. This flexibility is both confusing and unnecessary. + +Additionally, the allocation file is loaded as fair-scheduler.xml from the classpath if it is not specified, but is loaded as a File if it is. This causes two problems +1. We see different behavior when not setting the yarn.scheduler.fair.allocation.file, and setting it to fair-scheduler.xml, which is its default. +2. Classloaders may choose to cache resources, which can break the reload logic when yarn.scheduler.fair.allocation.file is not specified. + +We should never allow the yarn.scheduler properties to go into fair-scheduler.xml. And we should always load the allocations file as a file, not as a resource on the classpath. To preserve existing behavior and allow loading files from the classpath, we can look for files on the classpath, but strip of their scheme and interpret them as Files. +
    • +
    • YARN-1221. + Major bug reported by Sandy Ryza and fixed by Siqi Li (resourcemanager , scheduler)
      + With Fair Scheduler, reserved MB reported in RM web UI increases indefinitely
      +
    • +
    • YARN-1219. + Major bug reported by shanyu zhao and fixed by shanyu zhao (nodemanager)
      + FSDownload changes file suffix making FileUtil.unTar() throw exception
      +
      While running a Hive join operation on Yarn, I saw exception as described below. This is caused by FSDownload copy the files into a temp file and change the suffix into ".tmp" before unpacking it. In unpack(), it uses FileUtil.unTar() which will determine if the file is "gzipped" by looking at the file suffix: +{code} +boolean gzipped = inFile.toString().endsWith("gz"); +{code} + +To fix this problem, we can remove the ".tmp" in the temp file name. + +Here is the detailed exception: + +org.apache.commons.compress.archivers.tar.TarArchiveInputStream.getNextTarEntry(TarArchiveInputStream.java:240) + at org.apache.hadoop.fs.FileUtil.unTarUsingJava(FileUtil.java:676) + at org.apache.hadoop.fs.FileUtil.unTar(FileUtil.java:625) + at org.apache.hadoop.yarn.util.FSDownload.unpack(FSDownload.java:203) + at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:287) + at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:50) + at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334) + at java.util.concurrent.FutureTask.run(FutureTask.java:166) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) + at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334) + at java.util.concurrent.FutureTask.run(FutureTask.java:166) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) + +at java.lang.Thread.run(Thread.java:722)
    • +
    • YARN-1215. + Major bug reported by Chuan Liu and fixed by Chuan Liu (api)
      + Yarn URL should include userinfo
      +
      In the {{org.apache.hadoop.yarn.api.records.URL}} class, we don't have an userinfo as part of the URL. When converting a {{java.net.URI}} object into the YARN URL object in {{ConverterUtils.getYarnUrlFromURI()}} method, we will set uri host as the url host. If the uri has a userinfo part, the userinfo is discarded. This will lead to information loss if the original uri has the userinfo, e.g. foo://username:password@example.com will be converted to foo://example.com and username/password information is lost during the conversion. + +
    • +
    • YARN-1214. + Critical sub-task reported by Jian He and fixed by Jian He (resourcemanager)
      + Register ClientToken MasterKey in SecretManager after it is saved
      +
      Currently, app attempt ClientToken master key is registered before it is saved. This can cause problem that before the master key is saved, client gets the token and RM also crashes, RM cannot reloads the master key back after it restarts as it is not saved. As a result, client is holding an invalid token. + +We can register the client token master key after it is saved in the store.
    • +
    • YARN-1213. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Restore config to ban submitting to undeclared pools in the Fair Scheduler
      +
    • +
    • YARN-1204. + Major sub-task reported by Yesha Vora and fixed by Omkar Vinit Joshi
      + Need to add https port related property in Yarn
      +
      There is no yarn property available to configure https port for Resource manager, nodemanager and history server. Currently, Yarn services uses the port defined for http [defined by 'mapreduce.jobhistory.webapp.address','yarn.nodemanager.webapp.address', 'yarn.resourcemanager.webapp.address'] for running services on https protocol. + +Yarn should have list of property to assign https port for RM, NM and JHS. +It can be like below. +yarn.nodemanager.webapp.https.address +yarn.resourcemanager.webapp.https.address +mapreduce.jobhistory.webapp.https.address
    • +
    • YARN-1203. + Major sub-task reported by Yesha Vora and fixed by Omkar Vinit Joshi
      + Application Manager UI does not appear with Https enabled
      +
      Need to add support to disable 'hadoop.ssl.enabled' for MR jobs. + +A job should be able to run on http protocol by setting 'hadoop.ssl.enabled' property at job level. +
    • +
    • YARN-1167. + Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)
      + Submitted distributed shell application shows appMasterHost = empty
      +
      Submit distributed shell application. Once the application turns to be RUNNING state, app master host should not be empty. In reality, it is empty. + +==console logs== +distributedshell.Client: Got application report from ASM for, appId=12, clientToAMToken=null, appDiagnostics=, appMasterHost=, appQueue=default, appMasterRpcPort=0, appStartTime=1378505161360, yarnAppState=RUNNING, distributedFinalState=UNDEFINED, +
    • +
    • YARN-1157. + Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (resourcemanager)
      + ResourceManager UI has invalid tracking URL link for distributed shell application
      +
      Submit YARN distributed shell application. Goto ResourceManager Web UI. The application definitely appears. In Tracking UI column, there will be history link. Click on that link. Instead of showing application master web UI, HTTP error 500 would appear.
    • +
    • YARN-1149. + Major bug reported by Ramya Sunil and fixed by Xuan Gong
      + NM throws InvalidStateTransitonException: Invalid event: APPLICATION_LOG_HANDLING_FINISHED at RUNNING
      +
      When nodemanager receives a kill signal when an application has finished execution but log aggregation has not kicked in, InvalidStateTransitonException: Invalid event: APPLICATION_LOG_HANDLING_FINISHED at RUNNING is thrown + +{noformat} +2013-08-25 20:45:00,875 INFO logaggregation.AppLogAggregatorImpl (AppLogAggregatorImpl.java:finishLogAggregation(254)) - Application just finished : application_1377459190746_0118 +2013-08-25 20:45:00,876 INFO logaggregation.AppLogAggregatorImpl (AppLogAggregatorImpl.java:uploadLogsForContainer(105)) - Starting aggregate log-file for app application_1377459190746_0118 at /app-logs/foo/logs/application_1377459190746_0118/<host>_45454.tmp +2013-08-25 20:45:00,876 INFO logaggregation.LogAggregationService (LogAggregationService.java:stopAggregators(151)) - Waiting for aggregation to complete for application_1377459190746_0118 +2013-08-25 20:45:00,891 INFO logaggregation.AppLogAggregatorImpl (AppLogAggregatorImpl.java:uploadLogsForContainer(122)) - Uploading logs for container container_1377459190746_0118_01_000004. Current good log dirs are /tmp/yarn/local +2013-08-25 20:45:00,915 INFO logaggregation.AppLogAggregatorImpl (AppLogAggregatorImpl.java:doAppLogAggregation(182)) - Finished aggregate log-file for app application_1377459190746_0118 +2013-08-25 20:45:00,925 WARN application.Application (ApplicationImpl.java:handle(427)) - Can't handle this event at current state +org.apache.hadoop.yarn.state.InvalidStateTransitonException: Invalid event: APPLICATION_LOG_HANDLING_FINISHED at RUNNING + at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:305) + at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) + at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl.handle(ApplicationImpl.java:425) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl.handle(ApplicationImpl.java:59) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl$ApplicationEventDispatcher.handle(ContainerManagerImpl.java:697) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl$ApplicationEventDispatcher.handle(ContainerManagerImpl.java:689) + at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:134) + at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:81) + at java.lang.Thread.run(Thread.java:662) +2013-08-25 20:45:00,926 INFO application.Application (ApplicationImpl.java:handle(430)) - Application application_1377459190746_0118 transitioned from RUNNING to null +2013-08-25 20:45:00,927 WARN monitor.ContainersMonitorImpl (ContainersMonitorImpl.java:run(463)) - org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl is interrupted. Exiting. +2013-08-25 20:45:00,938 INFO ipc.Server (Server.java:stop(2437)) - Stopping server on 8040 +{noformat} + +
    • +
    • YARN-1141. + Major bug reported by Zhijie Shen and fixed by Zhijie Shen
      + Updating resource requests should be decoupled with updating blacklist
      +
      Currently, in CapacityScheduler and FifoScheduler, blacklist is updated together with resource requests, only when the incoming resource requests are not empty. Therefore, when the incoming resource requests are empty, the blacklist will not be updated even when blacklist additions and removals are not empty.
    • +
    • YARN-1131. + Minor sub-task reported by Tassapol Athiapinya and fixed by Siddharth Seth (client)
      + $yarn logs command should return an appropriate error message if YARN application is still running
      +
      In the case when log aggregation is enabled, if a user submits MapReduce job and runs $ yarn logs -applicationId <app ID> while the YARN application is running, the command will return no message and return user back to shell. It is nice to tell the user that log aggregation is in progress. + +{code} +-bash-4.1$ /usr/bin/yarn logs -applicationId application_1377900193583_0002 +-bash-4.1$ +{code} + +At the same time, if invalid application ID is given, YARN CLI should say that the application ID is incorrect rather than throwing NoSuchElementException. +{code} +$ /usr/bin/yarn logs -applicationId application_00000 +Exception in thread "main" java.util.NoSuchElementException +at com.google.common.base.AbstractIterator.next(AbstractIterator.java:75) +at org.apache.hadoop.yarn.util.ConverterUtils.toApplicationId(ConverterUtils.java:124) +at org.apache.hadoop.yarn.util.ConverterUtils.toApplicationId(ConverterUtils.java:119) +at org.apache.hadoop.yarn.logaggregation.LogDumper.run(LogDumper.java:110) +at org.apache.hadoop.yarn.logaggregation.LogDumper.main(LogDumper.java:255) + +{code} +
    • +
    • YARN-1128. + Major bug reported by Sandy Ryza and fixed by Karthik Kambatla (scheduler)
      + FifoPolicy.computeShares throws NPE on empty list of Schedulables
      +
      FifoPolicy gives all of a queue's share to the earliest-scheduled application. + +{code} + Schedulable earliest = null; + for (Schedulable schedulable : schedulables) { + if (earliest == null || + schedulable.getStartTime() < earliest.getStartTime()) { + earliest = schedulable; + } + } + earliest.setFairShare(Resources.clone(totalResources)); +{code} + +If the queue has no schedulables in it, earliest will be left null, leading to an NPE on the last line.
    • +
    • YARN-1090. + Major bug reported by Yesha Vora and fixed by Jian He
      + Job does not get into Pending State
      +
      When there is no resource available to run a job, next job should go in pending state. RM UI should show next job as pending app and the counter for the pending app should be incremented. + +But Currently. Next job stays in ACCEPTED state and No AM has been assigned to this job.Though Pending App count is not incremented. +Running 'job status <nextjob>' shows job state=PREP. + +$ mapred job -status job_1377122233385_0002 +13/08/21 21:59:23 INFO client.RMProxy: Connecting to ResourceManager at host1/ip1 + +Job: job_1377122233385_0002 +Job File: /ABC/.staging/job_1377122233385_0002/job.xml +Job Tracking URL : http://host1:port1/application_1377122233385_0002/ +Uber job : false +Number of maps: 0 +Number of reduces: 0 +map() completion: 0.0 +reduce() completion: 0.0 +Job state: PREP +retired: false +reason for failure:
    • +
    • YARN-1070. + Major sub-task reported by Hitesh Shah and fixed by Zhijie Shen (nodemanager)
      + ContainerImpl State Machine: Invalid event: CONTAINER_KILLED_ON_REQUEST at CONTAINER_CLEANEDUP_AFTER_KILL
      +
    • +
    • YARN-1032. + Critical bug reported by Lohit Vijayarenu and fixed by Lohit Vijayarenu
      + NPE in RackResolve
      +
      We found a case where our rack resolve script was not returning rack due to problem with resolving host address. This exception was see in RackResolver.java as NPE, ultimately caught in RMContainerAllocator. + +{noformat} +2013-08-01 07:11:37,708 ERROR [RMCommunicator Allocator] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: ERROR IN CONTACTING RM. +java.lang.NullPointerException + at org.apache.hadoop.yarn.util.RackResolver.coreResolve(RackResolver.java:99) + at org.apache.hadoop.yarn.util.RackResolver.resolve(RackResolver.java:92) + at org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator$ScheduledRequests.assignMapsWithLocality(RMContainerAllocator.java:1039) + at org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator$ScheduledRequests.assignContainers(RMContainerAllocator.java:925) + at org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator$ScheduledRequests.assign(RMContainerAllocator.java:861) + at org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator$ScheduledRequests.access$400(RMContainerAllocator.java:681) + at org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator.heartbeat(RMContainerAllocator.java:219) + at org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator$1.run(RMCommunicator.java:243) + at java.lang.Thread.run(Thread.java:722) + +{noformat}
    • +
    • YARN-899. + Major sub-task reported by Sandy Ryza and fixed by Xuan Gong (scheduler)
      + Get queue administration ACLs working
      +
      The Capacity Scheduler documents the yarn.scheduler.capacity.root.<queue-path>.acl_administer_queue config option for controlling who can administer a queue, but it is not hooked up to anything. The Fair Scheduler could make use of a similar option as well. This is a feature-parity regression from MR1.
    • +
    • YARN-890. + Major bug reported by Trupti Dhavle and fixed by Xuan Gong (resourcemanager)
      + The roundup for memory values on resource manager UI is misleading
      +
      +From the yarn-site.xml, I see following values- +<property> +<name>yarn.nodemanager.resource.memory-mb</name> +<value>4192</value> +</property> +<property> +<name>yarn.scheduler.maximum-allocation-mb</name> +<value>4192</value> +</property> +<property> +<name>yarn.scheduler.minimum-allocation-mb</name> +<value>1024</value> +</property> + +However the resourcemanager UI shows total memory as 5MB +
    • +
    • YARN-876. + Major bug reported by PengZhang and fixed by PengZhang (resourcemanager)
      + Node resource is added twice when node comes back from unhealthy to healthy
      +
      When an unhealthy restarts, its resource maybe added twice in scheduler. +First time is at node's reconnection, while node's final state is still "UNHEALTHY". +And second time is at node's update, while node's state changing from "UNHEALTHY" to "HEALTHY".
    • +
    • YARN-621. + Critical sub-task reported by Allen Wittenauer and fixed by Omkar Vinit Joshi (resourcemanager)
      + RM triggers web auth failure before first job
      +
      On a secure YARN setup, before the first job is executed, going to the web interface of the resource manager triggers authentication errors.
    • +
    • YARN-49. + Major sub-task reported by Hitesh Shah and fixed by Vinod Kumar Vavilapalli (applications/distributed-shell)
      + Improve distributed shell application to work on a secure cluster
      +
    • +
    • MAPREDUCE-5562. + Major sub-task reported by Zhijie Shen and fixed by Zhijie Shen
      + MR AM should exit when unregister() throws exception
      +
    • +
    • MAPREDUCE-5554. + Minor bug reported by Robert Kanter and fixed by Robert Kanter (test)
      + hdfs-site.xml included in hadoop-mapreduce-client-jobclient tests jar is breaking tests for downstream components
      +
    • +
    • MAPREDUCE-5551. + Blocker sub-task reported by Zhijie Shen and fixed by Zhijie Shen
      + Binary Incompatibility of O.A.H.U.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes
      +
    • +
    • MAPREDUCE-5545. + Major bug reported by Robert Kanter and fixed by Robert Kanter
      + org.apache.hadoop.mapred.TestTaskAttemptListenerImpl.testCommitWindow times out
      +
    • +
    • MAPREDUCE-5544. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza
      + JobClient#getJob loads job conf twice
      +
    • +
    • MAPREDUCE-5538. + Blocker sub-task reported by Zhijie Shen and fixed by Zhijie Shen
      + MRAppMaster#shutDownJob shouldn't send job end notification before checking isLastRetry
      +
    • +
    • MAPREDUCE-5536. + Blocker bug reported by Yesha Vora and fixed by Omkar Vinit Joshi
      + mapreduce.jobhistory.webapp.https.address property is not respected
      +
    • +
    • MAPREDUCE-5533. + Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applicationmaster)
      + Speculative execution does not function for reduce
      +
    • +
    • MAPREDUCE-5531. + Blocker sub-task reported by Robert Kanter and fixed by Robert Kanter (mrv1 , mrv2)
      + Binary and source incompatibility in mapreduce.TaskID and mapreduce.TaskAttemptID between branch-1 and branch-2
      +
    • +
    • MAPREDUCE-5530. + Blocker sub-task reported by Robert Kanter and fixed by Robert Kanter (mrv1 , mrv2)
      + Binary and source incompatibility in mapred.lib.CombineFileInputFormat between branch-1 and branch-2
      +
    • +
    • MAPREDUCE-5529. + Blocker sub-task reported by Robert Kanter and fixed by Robert Kanter (mrv1 , mrv2)
      + Binary incompatibilities in mapred.lib.TotalOrderPartitioner between branch-1 and branch-2
      +
    • +
    • MAPREDUCE-5525. + Minor test reported by Chuan Liu and fixed by Chuan Liu (mrv2 , test)
      + Increase timeout of TestDFSIO.testAppend and TestMRJobsWithHistoryService.testJobHistoryData
      +
    • +
    • MAPREDUCE-5523. + Major bug reported by Omkar Vinit Joshi and fixed by Omkar Vinit Joshi
      + Need to add https port related property in Job history server
      +
    • +
    • MAPREDUCE-5515. + Major bug reported by Omkar Vinit Joshi and fixed by Omkar Vinit Joshi
      + Application Manager UI does not appear with Https enabled
      +
    • +
    • MAPREDUCE-5513. + Major bug reported by Jason Lowe and fixed by Robert Parker
      + ConcurrentModificationException in JobControl
      +
    • +
    • MAPREDUCE-5505. + Critical sub-task reported by Jian He and fixed by Zhijie Shen
      + Clients should be notified job finished only after job successfully unregistered
      +
    • +
    • MAPREDUCE-5503. + Blocker bug reported by Jason Lowe and fixed by Jian He (mrv2)
      + TestMRJobClient.testJobClient is failing
      +
    • +
    • MAPREDUCE-5489. + Critical bug reported by Yesha Vora and fixed by Zhijie Shen
      + MR jobs hangs as it does not use the node-blacklisting feature in RM requests
      +
    • +
    • MAPREDUCE-5488. + Major bug reported by Arpit Gupta and fixed by Jian He
      + Job recovery fails after killing all the running containers for the app
      +
    • +
    • MAPREDUCE-5459. + Major bug reported by Zhijie Shen and fixed by Zhijie Shen
      + Update the doc of running MRv1 examples jar on YARN
      +
    • +
    • MAPREDUCE-5442. + Major bug reported by Yingda Chen and fixed by Yingda Chen (client)
      + $HADOOP_MAPRED_HOME/$HADOOP_CONF_DIR setting not working on Windows
      +
    • +
    • MAPREDUCE-5170. + Trivial bug reported by Sangjin Lee and fixed by Sangjin Lee (mrv2)
      + incorrect exception message if min node size > min rack size
      +
    • +
    • HDFS-5308. + Major improvement reported by Haohui Mai and fixed by Haohui Mai
      + Replace HttpConfig#getSchemePrefix with implicit schemes in HDFS JSP
      +
    • +
    • HDFS-5306. + Major sub-task reported by Suresh Srinivas and fixed by Suresh Srinivas (datanode , namenode)
      + Datanode https port is not available at the namenode
      +
    • +
    • HDFS-5300. + Major bug reported by Vinay and fixed by Vinay (namenode)
      + FSNameSystem#deleteSnapshot() should not check owner in case of permissions disabled
      +
    • +
    • HDFS-5299. + Blocker bug reported by Vinay and fixed by Vinay (namenode)
      + DFS client hangs in updatePipeline RPC when failover happened
      +
    • +
    • HDFS-5289. + Major bug reported by Aaron T. Myers and fixed by Aaron T. Myers (test)
      + Race condition in TestRetryCacheWithHA#testCreateSymlink causes spurious test failure
      +
    • +
    • HDFS-5279. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (namenode)
      + Guard against NullPointerException in NameNode JSP pages before initialization of FSNamesystem.
      +
    • +
    • HDFS-5268. + Major bug reported by Brandon Li and fixed by Brandon Li (nfs)
      + NFS write commit verifier is not set in a few places
      +
    • +
    • HDFS-5265. + Major bug reported by Haohui Mai and fixed by Haohui Mai
      + Namenode fails to start when dfs.https.port is unspecified
      +
    • +
    • HDFS-5259. + Major sub-task reported by Yesha Vora and fixed by Brandon Li (nfs)
      + Support client which combines appended data with old data before sends it to NFS server
      +
    • +
    • HDFS-5258. + Minor bug reported by Chris Nauroth and fixed by Chuan Liu (test)
      + Skip tests in TestHDFSCLI that are not applicable on Windows.
      +
    • +
    • HDFS-5256. + Major improvement reported by Haohui Mai and fixed by Haohui Mai (nfs)
      + Use guava LoadingCache to implement DFSClientCache
      +
    • +
    • HDFS-5255. + Major bug reported by Yesha Vora and fixed by Arpit Agarwal
      + Distcp job fails with hsftp when https is enabled in insecure cluster
      +
    • +
    • HDFS-5251. + Major bug reported by Haohui Mai and fixed by Haohui Mai
      + Race between the initialization of NameNode and the http server
      +
    • +
    • HDFS-5246. + Major sub-task reported by Jinghui Wang and fixed by Jinghui Wang (nfs)
      + Make Hadoop nfs server port and mount daemon port configurable
      +
    • +
    • HDFS-5230. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai (nfs)
      + Introduce RpcInfo to decouple XDR classes from the RPC API
      +
    • +
    • HDFS-5228. + Blocker bug reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (hdfs-client)
      + The RemoteIterator returned by DistributedFileSystem.listFiles(..) may throw NPE
      +
    • +
    • HDFS-5186. + Minor test reported by Chuan Liu and fixed by Chuan Liu (namenode , test)
      + TestFileJournalManager fails on Windows due to file handle leaks
      +
    • +
    • HDFS-5139. + Major improvement reported by Arpit Agarwal and fixed by Arpit Agarwal (tools)
      + Remove redundant -R option from setrep
      +
    • +
    • HDFS-5031. + Blocker bug reported by Vinay and fixed by Vinay (datanode)
      + BlockScanner scans the block multiple times and on restart scans everything
      +
    • +
    • HDFS-4817. + Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (hdfs-client)
      + make HDFS advisory caching configurable on a per-file basis
      +
    • +
    • HADOOP-10020. + Blocker sub-task reported by Colin Patrick McCabe and fixed by Sanjay Radia (fs)
      + disable symlinks temporarily
      +
      During review of symbolic links, many issues were found related impact on semantics of existing APIs such FileSystem#listStatus, FileSystem#globStatus etc. There were also many issues brought up about symbolic links and the impact on security and functionality of HDFS. All these issues will be address in the upcoming release 2.3. Until then the feature is temporarily disabled.
    • +
    • HADOOP-10017. + Major sub-task reported by Jing Zhao and fixed by Haohui Mai
      + Fix NPE in DFSClient#getDelegationToken when doing Distcp from a secured cluster to an insecured cluster
      +
    • +
    • HADOOP-10012. + Blocker bug reported by Arpit Gupta and fixed by Suresh Srinivas (ha)
      + Secure Oozie jobs fail with delegation token renewal exception in Namenode HA setup
      +
    • +
    • HADOOP-10003. + Major bug reported by Jason Dere and fixed by (fs)
      + HarFileSystem.listLocatedStatus() fails
      +
    • +
    • HADOOP-9976. + Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla
      + Different versions of avro and avro-maven-plugin
      +
    • +
    • HADOOP-9948. + Minor test reported by Chuan Liu and fixed by Chuan Liu (test)
      + Add a config value to CLITestHelper to skip tests on Windows
      +
    • +
    • HADOOP-9776. + Major bug reported by shanyu zhao and fixed by shanyu zhao (fs)
      + HarFileSystem.listStatus() returns invalid authority if port number is empty
      +
    • +
    • HADOOP-9761. + Blocker bug reported by Andrew Wang and fixed by Andrew Wang (viewfs)
      + ViewFileSystem#rename fails when using DistributedFileSystem
      +
    • +
    • HADOOP-9758. + Major improvement reported by Andrew Wang and fixed by Andrew Wang
      + Provide configuration option for FileSystem/FileContext symlink resolution
      +
    • +
    • HADOOP-8315. + Major improvement reported by Todd Lipcon and fixed by Todd Lipcon (auto-failover , ha)
      + Support SASL-authenticated ZooKeeper in ActiveStandbyElector
      +
    • +
    + + Hadoop 2.1.1-beta Release Notes