diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java index c9b21d1c3a1..006cb35b1ea 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java @@ -185,7 +185,7 @@ public void authenticate(URL url, AuthenticatedURL.Token token) conn.setRequestMethod(AUTH_HTTP_METHOD); conn.connect(); - if (conn.getRequestProperty(AUTHORIZATION) != null && conn.getResponseCode() == HttpURLConnection.HTTP_OK) { + if (conn.getResponseCode() == HttpURLConnection.HTTP_OK) { LOG.debug("JDK performed authentication on our behalf."); // If the JDK already did the SPNEGO back-and-forth for // us, just pull out the token. diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/AuthenticatorTestCase.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/AuthenticatorTestCase.java index ba7b43343d6..4e4ecc483eb 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/AuthenticatorTestCase.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/client/AuthenticatorTestCase.java @@ -136,7 +136,6 @@ protected void _testAuthentication(Authenticator authenticator, boolean doPost) TestConnectionConfigurator connConf = new TestConnectionConfigurator(); AuthenticatedURL aUrl = new AuthenticatedURL(authenticator, connConf); HttpURLConnection conn = aUrl.openConnection(url, token); - Assert.assertTrue(token.isSet()); Assert.assertTrue(connConf.invoked); String tokenStr = token.toString(); if (doPost) { diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 6fd5c4be0f9..8de10273445 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -284,7 +284,7 @@ Trunk (Unreleased) HADOOP-7761. Improve the performance of raw comparisons. (todd) - HADOOP-8589 ViewFs tests fail when tests and home dirs are nested (sanjay Radia) + HADOOP-8589. ViewFs tests fail when tests and home dirs are nested (sanjay Radia) Release 2.3.0 - UNRELEASED @@ -375,6 +375,14 @@ Release 2.3.0 - UNRELEASED HADOOP-10064. Upgrade to maven antrun plugin version 1.7 (Arpit Agarwal via jeagles) + HADOOP-9594. Update apache commons math dependency (Timothy St. Clair via + stevel) + + HADOOP-10095. In CodecPool, synchronize pool and codecList separately in + order to reduce lock contention. (Nicolas Liochon via szetszwo) + + HADOOP-10067. Missing POM dependency on jsr305 (Robert Rati via stevel) + OPTIMIZATIONS HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn) @@ -426,6 +434,9 @@ Release 2.3.0 - UNRELEASED HADOOP-10093. hadoop-env.cmd sets HADOOP_CLIENT_OPTS with a max heap size that is too small. (Shanyu Zhao via cnauroth) + HADOOP-10094. NPE in GenericOptionsParser#preProcessForWindows(). + (Enis Soztutar via cnauroth) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES @@ -476,6 +487,8 @@ Release 2.2.1 - UNRELEASED as [-Dkey, value] which breaks GenericsOptionParser. (Enis Soztutar via cnauroth) + HADOOP-10078. KerberosAuthenticator always does SPNEGO. (rkanter via tucu) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES @@ -2298,6 +2311,10 @@ Release 0.23.10 - UNRELEASED OPTIMIZATIONS + HADOOP-9956. RPC listener inefficiently assigns connections to readers (daryn) + + HADOOP-9955. RPC idle connection closing is extremely inefficient (daryn) + BUG FIXES Release 0.23.9 - 2013-07-08 diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 939ce20094d..7ebf9b4cf08 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -55,7 +55,7 @@ org.apache.commons - commons-math + commons-math3 compile @@ -83,6 +83,11 @@ commons-net compile + + commons-collections + commons-collections + compile + javax.servlet servlet-api @@ -213,6 +218,11 @@ com.jcraft jsch + + com.google.code.findbugs + jsr305 + compile + org.apache.zookeeper diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index dcab369e385..e52659dd6e7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -65,6 +65,13 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { /** Default value for IPC_SERVER_RPC_READ_THREADS_KEY */ public static final int IPC_SERVER_RPC_READ_THREADS_DEFAULT = 1; + /** Number of pending connections that may be queued per socket reader */ + public static final String IPC_SERVER_RPC_READ_CONNECTION_QUEUE_SIZE_KEY = + "ipc.server.read.connection-queue.size"; + /** Default value for IPC_SERVER_RPC_READ_CONNECTION_QUEUE_SIZE */ + public static final int IPC_SERVER_RPC_READ_CONNECTION_QUEUE_SIZE_DEFAULT = + 100; + public static final String IPC_MAXIMUM_DATA_LENGTH = "ipc.maximum.data.length"; @@ -219,4 +226,10 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { public static final String IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY = "ipc.client.fallback-to-simple-auth-allowed"; public static final boolean IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT = false; + /** How often the server scans for idle connections */ + public static final String IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_KEY = + "ipc.client.connection.idle-scan-interval.ms"; + /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */ + public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT = + 10000; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java index 63e3a09c307..11d88f13cd4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java @@ -85,16 +85,15 @@ private static T borrow(Map, List> pool, T codec = null; // Check if an appropriate codec is available + List codecList; synchronized (pool) { - if (pool.containsKey(codecClass)) { - List codecList = pool.get(codecClass); - - if (codecList != null) { - synchronized (codecList) { - if (!codecList.isEmpty()) { - codec = codecList.remove(codecList.size()-1); - } - } + codecList = pool.get(codecClass); + } + + if (codecList != null) { + synchronized (codecList) { + if (!codecList.isEmpty()) { + codec = codecList.remove(codecList.size() - 1); } } } @@ -105,15 +104,17 @@ private static T borrow(Map, List> pool, private static void payback(Map, List> pool, T codec) { if (codec != null) { Class codecClass = ReflectionUtils.getClass(codec); + List codecList; synchronized (pool) { - if (!pool.containsKey(codecClass)) { - pool.put(codecClass, new ArrayList()); + codecList = pool.get(codecClass); + if (codecList == null) { + codecList = new ArrayList(); + pool.put(codecClass, codecList); } + } - List codecList = pool.get(codecClass); - synchronized (codecList) { - codecList.add(codec); - } + synchronized (codecList) { + codecList.add(codec); } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java index 820f1068977..8a77bede4f2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java @@ -114,7 +114,17 @@ public static class POSIX { public static interface CacheTracker { public void fadvise(String identifier, long offset, long len, int flags); } - + + public static CacheManipulator cacheManipulator = new CacheManipulator(); + + @VisibleForTesting + public static class CacheManipulator { + public void mlock(String identifier, ByteBuffer buffer, + long len) throws IOException { + POSIX.mlock(buffer, len); + } + } + static { if (NativeCodeLoader.isNativeCodeLoaded()) { try { @@ -249,7 +259,7 @@ static native void munlock_native( * * @throws NativeIOException */ - public static void mlock(ByteBuffer buffer, long len) + static void mlock(ByteBuffer buffer, long len) throws IOException { assertCodeLoaded(); if (!buffer.isDirect()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 89f9501d980..b8dc754c74f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -51,11 +51,13 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; import javax.security.sasl.Sasl; import javax.security.sasl.SaslException; @@ -345,17 +347,8 @@ public static boolean isRpcInvocation() { private int port; // port we listen on private int handlerCount; // number of handler threads private int readThreads; // number of read threads + private int readerPendingConnectionQueue; // number of connections to queue per read thread private Class rpcRequestClass; // class used for deserializing the rpc request - private int maxIdleTime; // the maximum idle time after - // which a client may be disconnected - private int thresholdIdleConnections; // the number of idle connections - // after which we will start - // cleaning up idle - // connections - int maxConnectionsToNuke; // the max number of - // connections to nuke - //during a cleanup - protected RpcMetrics rpcMetrics; protected RpcDetailedMetrics rpcDetailedMetrics; @@ -373,13 +366,10 @@ public static boolean isRpcInvocation() { volatile private boolean running = true; // true while server runs private BlockingQueue callQueue; // queued calls - private List connectionList = - Collections.synchronizedList(new LinkedList()); - //maintain a list - //of client connections + // maintains the set of client connections and handles idle timeouts + private ConnectionManager connectionManager; private Listener listener = null; private Responder responder = null; - private int numConnections = 0; private Handler[] handlers = null; /** @@ -449,8 +439,8 @@ Iterable getHandlers() { } @VisibleForTesting - List getConnections() { - return connectionList; + Connection[] getConnections() { + return connectionManager.toArray(); } /** @@ -518,11 +508,6 @@ private class Listener extends Thread { private Reader[] readers = null; private int currentReader = 0; private InetSocketAddress address; //the address we bind at - private Random rand = new Random(); - private long lastCleanupRunTime = 0; //the last time when a cleanup connec- - //-tion (for idle connections) ran - private long cleanupInterval = 10000; //the minimum interval between - //two cleanup runs private int backlogLength = conf.getInt( CommonConfigurationKeysPublic.IPC_SERVER_LISTEN_QUEUE_SIZE_KEY, CommonConfigurationKeysPublic.IPC_SERVER_LISTEN_QUEUE_SIZE_DEFAULT); @@ -553,12 +538,14 @@ public Listener() throws IOException { } private class Reader extends Thread { - private volatile boolean adding = false; + final private BlockingQueue pendingConnections; private final Selector readSelector; Reader(String name) throws IOException { super(name); + this.pendingConnections = + new LinkedBlockingQueue(readerPendingConnectionQueue); this.readSelector = Selector.open(); } @@ -580,10 +567,14 @@ private synchronized void doRunLoop() { while (running) { SelectionKey key = null; try { + // consume as many connections as currently queued to avoid + // unbridled acceptance of connections that starves the select + int size = pendingConnections.size(); + for (int i=size; i>0; i--) { + Connection conn = pendingConnections.take(); + conn.channel.register(readSelector, SelectionKey.OP_READ, conn); + } readSelector.select(); - while (adding) { - this.wait(1000); - } Iterator iter = readSelector.selectedKeys().iterator(); while (iter.hasNext()) { @@ -607,26 +598,14 @@ private synchronized void doRunLoop() { } /** - * This gets reader into the state that waits for the new channel - * to be registered with readSelector. If it was waiting in select() - * the thread will be woken up, otherwise whenever select() is called - * it will return even if there is nothing to read and wait - * in while(adding) for finishAdd call + * Updating the readSelector while it's being used is not thread-safe, + * so the connection must be queued. The reader will drain the queue + * and update its readSelector before performing the next select */ - public void startAdd() { - adding = true; + public void addConnection(Connection conn) throws InterruptedException { + pendingConnections.put(conn); readSelector.wakeup(); } - - public synchronized SelectionKey registerChannel(SocketChannel channel) - throws IOException { - return channel.register(readSelector, SelectionKey.OP_READ); - } - - public synchronized void finishAdd() { - adding = false; - this.notify(); - } void shutdown() { assert !running; @@ -638,58 +617,12 @@ void shutdown() { } } } - /** cleanup connections from connectionList. Choose a random range - * to scan and also have a limit on the number of the connections - * that will be cleanedup per run. The criteria for cleanup is the time - * for which the connection was idle. If 'force' is true then all - * connections will be looked at for the cleanup. - */ - private void cleanupConnections(boolean force) { - if (force || numConnections > thresholdIdleConnections) { - long currentTime = Time.now(); - if (!force && (currentTime - lastCleanupRunTime) < cleanupInterval) { - return; - } - int start = 0; - int end = numConnections - 1; - if (!force) { - start = rand.nextInt() % numConnections; - end = rand.nextInt() % numConnections; - int temp; - if (end < start) { - temp = start; - start = end; - end = temp; - } - } - int i = start; - int numNuked = 0; - while (i <= end) { - Connection c; - synchronized (connectionList) { - try { - c = connectionList.get(i); - } catch (Exception e) {return;} - } - if (c.timedOut(currentTime)) { - if (LOG.isDebugEnabled()) - LOG.debug(getName() + ": disconnecting client " + c.getHostAddress()); - closeConnection(c); - numNuked++; - end--; - c = null; - if (!force && numNuked == maxConnectionsToNuke) break; - } - else i++; - } - lastCleanupRunTime = Time.now(); - } - } @Override public void run() { LOG.info(getName() + ": starting"); SERVER.set(Server.this); + connectionManager.startIdleScan(); while (running) { SelectionKey key = null; try { @@ -713,12 +646,11 @@ public void run() { // some thread(s) a chance to finish LOG.warn("Out of Memory in server select", e); closeCurrentConnection(key, e); - cleanupConnections(true); + connectionManager.closeIdle(true); try { Thread.sleep(60000); } catch (Exception ie) {} } catch (Exception e) { closeCurrentConnection(key, e); } - cleanupConnections(false); } LOG.info("Stopping " + this.getName()); @@ -731,10 +663,9 @@ public void run() { selector= null; acceptChannel= null; - // clean up all connections - while (!connectionList.isEmpty()) { - closeConnection(connectionList.remove(0)); - } + // close all connections + connectionManager.stopIdleScan(); + connectionManager.closeAll(); } } @@ -742,8 +673,6 @@ private void closeCurrentConnection(SelectionKey key, Throwable e) { if (key != null) { Connection c = (Connection)key.attachment(); if (c != null) { - if (LOG.isDebugEnabled()) - LOG.debug(getName() + ": disconnecting client " + c.getHostAddress()); closeConnection(c); c = null; } @@ -754,8 +683,7 @@ InetSocketAddress getAddress() { return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress(); } - void doAccept(SelectionKey key) throws IOException, OutOfMemoryError { - Connection c = null; + void doAccept(SelectionKey key) throws InterruptedException, IOException, OutOfMemoryError { ServerSocketChannel server = (ServerSocketChannel) key.channel(); SocketChannel channel; while ((channel = server.accept()) != null) { @@ -765,22 +693,9 @@ void doAccept(SelectionKey key) throws IOException, OutOfMemoryError { channel.socket().setKeepAlive(true); Reader reader = getReader(); - try { - reader.startAdd(); - SelectionKey readKey = reader.registerChannel(channel); - c = new Connection(readKey, channel, Time.now()); - readKey.attach(c); - synchronized (connectionList) { - connectionList.add(numConnections, c); - numConnections++; - } - if (LOG.isDebugEnabled()) - LOG.debug("Server connection from " + c.toString() + - "; # active connections: " + numConnections + - "; # queued calls: " + callQueue.size()); - } finally { - reader.finishAdd(); - } + Connection c = connectionManager.register(channel); + key.attach(c); // so closeCurrentConnection can get the object + reader.addConnection(c); } } @@ -808,10 +723,6 @@ void doRead(SelectionKey key) throws InterruptedException { count = -1; //so that the (count < 0) block is executed } if (count < 0) { - if (LOG.isDebugEnabled()) - LOG.debug(getName() + ": disconnecting client " + - c + ". Number of active connections: "+ - numConnections); closeConnection(c); c = null; } @@ -1190,8 +1101,7 @@ public class Connection { private boolean sentNegotiate = false; private boolean useWrap = false; - public Connection(SelectionKey key, SocketChannel channel, - long lastContact) { + public Connection(SocketChannel channel, long lastContact) { this.channel = channel; this.lastContact = lastContact; this.data = null; @@ -1253,12 +1163,6 @@ private void incRpcCount() { rpcCount++; } - private boolean timedOut(long currentTime) { - if (isIdle() && currentTime - lastContact > maxIdleTime) - return true; - return false; - } - private UserGroupInformation getAuthorizedUgi(String authorizedId) throws InvalidToken, AccessControlException { if (authMethod == AuthMethod.TOKEN) { @@ -2189,16 +2093,10 @@ protected Server(String bindAddress, int port, CommonConfigurationKeys.IPC_SERVER_RPC_READ_THREADS_KEY, CommonConfigurationKeys.IPC_SERVER_RPC_READ_THREADS_DEFAULT); } + this.readerPendingConnectionQueue = conf.getInt( + CommonConfigurationKeys.IPC_SERVER_RPC_READ_CONNECTION_QUEUE_SIZE_KEY, + CommonConfigurationKeys.IPC_SERVER_RPC_READ_CONNECTION_QUEUE_SIZE_DEFAULT); this.callQueue = new LinkedBlockingQueue(maxQueueSize); - this.maxIdleTime = 2 * conf.getInt( - CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, - CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT); - this.maxConnectionsToNuke = conf.getInt( - CommonConfigurationKeysPublic.IPC_CLIENT_KILL_MAX_KEY, - CommonConfigurationKeysPublic.IPC_CLIENT_KILL_MAX_DEFAULT); - this.thresholdIdleConnections = conf.getInt( - CommonConfigurationKeysPublic.IPC_CLIENT_IDLETHRESHOLD_KEY, - CommonConfigurationKeysPublic.IPC_CLIENT_IDLETHRESHOLD_DEFAULT); this.secretManager = (SecretManager) secretManager; this.authorize = conf.getBoolean(CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, @@ -2219,6 +2117,7 @@ protected Server(String bindAddress, int port, // Create the responder here responder = new Responder(); + connectionManager = new ConnectionManager(); if (secretManager != null) { SaslRpcServer.init(conf); @@ -2277,11 +2176,7 @@ private List getAuthMethods(SecretManager secretManager, } private void closeConnection(Connection connection) { - synchronized (connectionList) { - if (connectionList.remove(connection)) - numConnections--; - } - connection.close(); + connectionManager.close(connection); } /** @@ -2536,7 +2431,7 @@ public int getPort() { * @return the number of open rpc connections */ public int getNumOpenConnections() { - return numConnections; + return connectionManager.size(); } /** @@ -2646,4 +2541,151 @@ private static int channelIO(ReadableByteChannel readCh, int nBytes = initialRemaining - buf.remaining(); return (nBytes > 0) ? nBytes : ret; } + + private class ConnectionManager { + final private AtomicInteger count = new AtomicInteger(); + final private Set connections; + + final private Timer idleScanTimer; + final private int idleScanThreshold; + final private int idleScanInterval; + final private int maxIdleTime; + final private int maxIdleToClose; + + ConnectionManager() { + this.idleScanTimer = new Timer( + "IPC Server idle connection scanner for port " + getPort(), true); + this.idleScanThreshold = conf.getInt( + CommonConfigurationKeysPublic.IPC_CLIENT_IDLETHRESHOLD_KEY, + CommonConfigurationKeysPublic.IPC_CLIENT_IDLETHRESHOLD_DEFAULT); + this.idleScanInterval = conf.getInt( + CommonConfigurationKeys.IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_KEY, + CommonConfigurationKeys.IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT); + this.maxIdleTime = 2 * conf.getInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT); + this.maxIdleToClose = conf.getInt( + CommonConfigurationKeysPublic.IPC_CLIENT_KILL_MAX_KEY, + CommonConfigurationKeysPublic.IPC_CLIENT_KILL_MAX_DEFAULT); + // create a set with concurrency -and- a thread-safe iterator, add 2 + // for listener and idle closer threads + this.connections = Collections.newSetFromMap( + new ConcurrentHashMap( + maxQueueSize, 0.75f, readThreads+2)); + } + + private boolean add(Connection connection) { + boolean added = connections.add(connection); + if (added) { + count.getAndIncrement(); + } + return added; + } + + private boolean remove(Connection connection) { + boolean removed = connections.remove(connection); + if (removed) { + count.getAndDecrement(); + } + return removed; + } + + int size() { + return count.get(); + } + + Connection[] toArray() { + return connections.toArray(new Connection[0]); + } + + Connection register(SocketChannel channel) { + Connection connection = new Connection(channel, Time.now()); + add(connection); + if (LOG.isDebugEnabled()) { + LOG.debug("Server connection from " + connection + + "; # active connections: " + size() + + "; # queued calls: " + callQueue.size()); + } + return connection; + } + + boolean close(Connection connection) { + boolean exists = remove(connection); + if (exists) { + if (LOG.isDebugEnabled()) { + LOG.debug(Thread.currentThread().getName() + + ": disconnecting client " + connection + + ". Number of active connections: "+ size()); + } + // only close if actually removed to avoid double-closing due + // to possible races + connection.close(); + } + return exists; + } + + // synch'ed to avoid explicit invocation upon OOM from colliding with + // timer task firing + synchronized void closeIdle(boolean scanAll) { + long minLastContact = Time.now() - maxIdleTime; + // concurrent iterator might miss new connections added + // during the iteration, but that's ok because they won't + // be idle yet anyway and will be caught on next scan + int closed = 0; + for (Connection connection : connections) { + // stop if connections dropped below threshold unless scanning all + if (!scanAll && size() < idleScanThreshold) { + break; + } + // stop if not scanning all and max connections are closed + if (connection.isIdle() && + connection.getLastContact() < minLastContact && + close(connection) && + !scanAll && (++closed == maxIdleToClose)) { + break; + } + } + } + + void closeAll() { + // use a copy of the connections to be absolutely sure the concurrent + // iterator doesn't miss a connection + for (Connection connection : toArray()) { + close(connection); + } + } + + void startIdleScan() { + scheduleIdleScanTask(); + } + + void stopIdleScan() { + idleScanTimer.cancel(); + } + + private void scheduleIdleScanTask() { + if (!running) { + return; + } + TimerTask idleScanTask = new TimerTask(){ + @Override + public void run() { + if (!running) { + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug(Thread.currentThread().getName()+": task running"); + } + try { + closeIdle(false); + } finally { + // explicitly reschedule so next execution occurs relative + // to the end of this scan, not the beginning + scheduleIdleScanTask(); + } + } + }; + idleScanTimer.schedule(idleScanTask, idleScanInterval); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index 259d329c4be..e386ce608f0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -38,7 +38,7 @@ import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.math.util.MathUtils; +import org.apache.commons.math3.util.ArithmeticUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsCollector; @@ -457,7 +457,7 @@ private synchronized void configureSinks() { MetricsConfig conf = entry.getValue(); int sinkPeriod = conf.getInt(PERIOD_KEY, PERIOD_DEFAULT); confPeriod = confPeriod == 0 ? sinkPeriod - : MathUtils.gcd(confPeriod, sinkPeriod); + : ArithmeticUtils.gcd(confPeriod, sinkPeriod); String clsName = conf.getClassName(""); if (clsName == null) continue; // sink can be registered later on String sinkName = entry.getKey(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java index 6048f8e8185..67818555393 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java @@ -431,6 +431,9 @@ private String[] preProcessForWindows(String[] args) { if (!Shell.WINDOWS) { return args; } + if (args == null) { + return null; + } List newArgs = new ArrayList(args.length); for (int i=0; i < args.length; i++) { String prop = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java index bd08efb5cda..d23df79d27f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ZKUtil.java @@ -71,6 +71,20 @@ private static int getPermFromString(String permString) { return perm; } + /** + * Helper method to remove a subset of permissions (remove) from a + * given set (perms). + * @param perms The permissions flag to remove from. Should be an OR of a + * some combination of {@link ZooDefs.Perms} + * @param remove The permissions to be removed. Should be an OR of a + * some combination of {@link ZooDefs.Perms} + * @return A permissions flag that is an OR of {@link ZooDefs.Perms} + * present in perms and not present in remove + */ + public static int removeSpecificPerms(int perms, int remove) { + return perms ^ remove; + } + /** * Parse comma separated list of ACL entries to secure generated nodes, e.g. * sasl:hdfs/host1@MY.DOMAIN:cdrwa,sasl:hdfs/host2@MY.DOMAIN:cdrwa diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index 33fb799c0c8..02516a183aa 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -44,12 +44,19 @@ import java.util.Collections; import java.util.List; import java.util.Random; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import javax.net.SocketFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; @@ -64,8 +71,10 @@ import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Assume; +import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; @@ -79,7 +88,7 @@ public class TestIPC { public static final Log LOG = LogFactory.getLog(TestIPC.class); - final private static Configuration conf = new Configuration(); + private static Configuration conf; final static private int PING_INTERVAL = 1000; final static private int MIN_SLEEP_TIME = 1000; /** @@ -89,7 +98,9 @@ public class TestIPC { static boolean WRITABLE_FAULTS_ENABLED = true; static int WRITABLE_FAULTS_SLEEP = 0; - static { + @Before + public void setupConf() { + conf = new Configuration(); Client.setPingInterval(conf, PING_INTERVAL); } @@ -613,6 +624,255 @@ public void testIpcWithServiceClass() throws IOException { server.stop(); } + private static class TestServerQueue extends Server { + final CountDownLatch firstCallLatch = new CountDownLatch(1); + final CountDownLatch callBlockLatch = new CountDownLatch(1); + + TestServerQueue(int expectedCalls, int readers, int callQ, int handlers, + Configuration conf) throws IOException { + super(ADDRESS, 0, LongWritable.class, handlers, readers, callQ, conf, null, null); + } + + @Override + public Writable call(RPC.RpcKind rpcKind, String protocol, Writable param, + long receiveTime) throws IOException { + firstCallLatch.countDown(); + try { + callBlockLatch.await(); + } catch (InterruptedException e) { + throw new IOException(e); + } + return param; + } + } + + /** + * Check that reader queueing works + * @throws BrokenBarrierException + * @throws InterruptedException + */ + @Test(timeout=60000) + public void testIpcWithReaderQueuing() throws Exception { + // 1 reader, 1 connectionQ slot, 1 callq + for (int i=0; i < 10; i++) { + checkBlocking(1, 1, 1); + } + // 4 readers, 5 connectionQ slots, 2 callq + for (int i=0; i < 10; i++) { + checkBlocking(4, 5, 2); + } + } + + // goal is to jam a handler with a connection, fill the callq with + // connections, in turn jamming the readers - then flood the server and + // ensure that the listener blocks when the reader connection queues fill + private void checkBlocking(int readers, int readerQ, int callQ) throws Exception { + int handlers = 1; // makes it easier + + final Configuration conf = new Configuration(); + conf.setInt(CommonConfigurationKeys.IPC_SERVER_RPC_READ_CONNECTION_QUEUE_SIZE_KEY, readerQ); + + // send in enough clients to block up the handlers, callq, and readers + int initialClients = readers + callQ + handlers; + // max connections we should ever end up accepting at once + int maxAccept = initialClients + readers*readerQ + 1; // 1 = listener + // stress it with 2X the max + int clients = maxAccept*2; + + final AtomicInteger failures = new AtomicInteger(0); + final CountDownLatch callFinishedLatch = new CountDownLatch(clients); + + // start server + final TestServerQueue server = + new TestServerQueue(clients, readers, callQ, handlers, conf); + final InetSocketAddress addr = NetUtils.getConnectAddress(server); + server.start(); + + Client.setConnectTimeout(conf, 10000); + + // instantiate the threads, will start in batches + Thread[] threads = new Thread[clients]; + for (int i=0; i 1; i -= killMax) { + Thread.sleep(cleanupInterval); + assertFalse(error.get()); + assertEquals(i, server.getNumOpenConnections()); + } + + // connection for the first blocked call should still be open + Thread.sleep(cleanupInterval); + assertFalse(error.get()); + assertEquals(1, server.getNumOpenConnections()); + + // wake up call and ensure connection times out + firstCallBarrier.await(); + Thread.sleep(maxIdle*2); + assertFalse(error.get()); + assertEquals(0, server.getNumOpenConnections()); + } finally { + for (Thread t : threads) { + if (t != null) { + t.interrupt(); + t.join(); + } + server.stop(); + } + } + } + /** * Make a call from a client and verify if header info is changed in server side */ @@ -622,7 +882,7 @@ private void callAndVerify(Server server, InetSocketAddress addr, client.call(new LongWritable(RANDOM.nextLong()), addr, null, null, MIN_SLEEP_TIME, serviceClass, conf); - Connection connection = server.getConnections().get(0); + Connection connection = server.getConnections()[0]; int serviceClass2 = connection.getServiceClass(); assertFalse(noChanged ^ serviceClass == serviceClass2); client.stop(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java index 87bc83b4192..48a419b3a52 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java @@ -282,4 +282,12 @@ private void assertDOptionParsing(String[] args, Arrays.toString(remainingArgs) + Arrays.toString(expectedRemainingArgs), expectedRemainingArgs, remainingArgs); } + + /** Test passing null as args. Some classes still call + * Tool interface from java passing null. + */ + public void testNullArgs() throws IOException { + GenericOptionsParser parser = new GenericOptionsParser(conf, null); + parser.getRemainingArgs(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestZKUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestZKUtil.java index 1d14326d2ab..52d10ca2fca 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestZKUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestZKUtil.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.util.List; -import org.apache.hadoop.util.ZKUtil; import org.apache.hadoop.util.ZKUtil.BadAclFormatException; import org.apache.hadoop.util.ZKUtil.ZKAuthInfo; import org.apache.zookeeper.ZooDefs.Perms; @@ -75,6 +74,14 @@ private static void badAcl(String acls, String expectedErr) { } } + @Test + public void testRemoveSpecificPerms() { + int perms = Perms.ALL; + int remove = Perms.CREATE; + int newPerms = ZKUtil.removeSpecificPerms(perms, remove); + assertEquals("Removal failed", 0, newPerms & Perms.CREATE); + } + @Test public void testGoodACLs() { List result = ZKUtil.parseACLs( diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/libexec/httpfs-config.sh b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/libexec/httpfs-config.sh index 02e1a7172c7..621a2fa96f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/libexec/httpfs-config.sh +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/libexec/httpfs-config.sh @@ -55,7 +55,7 @@ print "Setting HTTPFS_HOME: ${HTTPFS_HOME}" # if [ -e "${HTTPFS_HOME}/bin/httpfs-env.sh" ]; then print "Sourcing: ${HTTPFS_HOME}/bin/httpfs-env.sh" - source ${HTTPFS_HOME}/bin/HTTPFS-env.sh + source ${HTTPFS_HOME}/bin/httpfs-env.sh grep "^ *export " ${HTTPFS_HOME}/bin/httpfs-env.sh | sed 's/ *export/ setting/' fi diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7586f950a56..2eef17a5056 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -13,6 +13,9 @@ Trunk (Unreleased) HDFS-3125. Add JournalService to enable Journal Daemon. (suresh) + HDFS-5444. Choose default web UI based on browser capabilities. (Haohui Mai + via jing9) + IMPROVEMENTS HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common. @@ -194,6 +197,8 @@ Trunk (Unreleased) HDFS-5485. Add command-line support for modifyDirective. (cmccabe) + HDFS-5366. recaching improvements (cmccabe) + OPTIMIZATIONS HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe) @@ -475,6 +480,20 @@ Release 2.3.0 - UNRELEASED HDFS-5495. Remove further JUnit3 usages from HDFS. (Jarek Jarcec Cecho via wang) + HDFS-5325. Remove WebHdfsFileSystem#ConnRunner. (Haohui Mai via jing9) + + HDFS-5488. Clean up TestHftpURLTimeout. (Haohui Mai via jing9) + + HDFS-5440. Extract the logic of handling delegation tokens in HftpFileSystem + to the TokenAspect class. (Haohui Mai via jing9) + + HDFS-5487. Introduce unit test for TokenAspect. (Haohui Mai via jing9) + + HDFS-4995. Make getContentSummary less expensive. (kihwal) + + HDFS-5506. Use URLConnectionFactory in DelegationTokenFetcher. (Haohui Mai + via jing9) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) @@ -529,13 +548,20 @@ Release 2.3.0 - UNRELEASED HDFS-5476. Snapshot: clean the blocks/files/directories under a renamed file/directory while deletion. (jing9) - HDFS-5325. Remove WebHdfsFileSystem#ConnRunner. (Haohui Mai via jing9) - - HDFS-5488. Clean up TestHftpURLTimeout. (Haohui Mai via jing9) - HDFS-5425. Renaming underconstruction file with snapshots can make NN failure on restart. (jing9 and Vinay) + HDFS-5474. Deletesnapshot can make Namenode in safemode on NN restarts. + (Sathish via jing9) + + HDFS-5075. httpfs-config.sh calls out incorrect env script name + (Timothy St. Clair via stevel) + + HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, + leads to NN safemode. (Vinay via jing9) + + HDFS-5438. Flaws in block report processing can cause data loss. (kihwal) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java index 1738a4c1bc7..c5c6d5c5492 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java @@ -42,4 +42,8 @@ public boolean corruptPacket() { public boolean uncorruptPacket() { return false; } + + public boolean failPacket() { + return false; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 0483ce8de0b..f3489361240 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -198,6 +198,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_LIST_LIMIT = "dfs.ls.limit"; public static final int DFS_LIST_LIMIT_DEFAULT = 1000; + public static final String DFS_CONTENT_SUMMARY_LIMIT_KEY = "dfs.content-summary.limit"; + public static final int DFS_CONTENT_SUMMARY_LIMIT_DEFAULT = 0; public static final String DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY = "dfs.datanode.failed.volumes.tolerated"; public static final int DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT = 0; public static final String DFS_DATANODE_SYNCONCLOSE_KEY = "dfs.datanode.synconclose"; @@ -342,6 +344,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_DATANODE_TRANSFERTO_ALLOWED_DEFAULT = true; public static final String DFS_HEARTBEAT_INTERVAL_KEY = "dfs.heartbeat.interval"; public static final long DFS_HEARTBEAT_INTERVAL_DEFAULT = 3; + public static final String DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS = "dfs.namenode.path.based.cache.retry.interval.ms"; + public static final long DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS_DEFAULT = 60000L; public static final String DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY = "dfs.namenode.decommission.interval"; public static final int DFS_NAMENODE_DECOMMISSION_INTERVAL_DEFAULT = 30; public static final String DFS_NAMENODE_DECOMMISSION_NODES_PER_INTERVAL_KEY = "dfs.namenode.decommission.nodes.per.interval"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index 7f5b6ed5323..c8bbf8fd841 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -151,6 +151,7 @@ public class DFSOutputStream extends FSOutputSummer private final short blockReplication; // replication factor of file private boolean shouldSyncBlock = false; // force blocks to disk upon close private CachingStrategy cachingStrategy; + private boolean failPacket = false; private static class Packet { private static final long HEART_BEAT_SEQNO = -1L; @@ -752,6 +753,16 @@ public void run() { one.seqno + " but received " + seqno); } isLastPacketInBlock = one.lastPacketInBlock; + + // Fail the packet write for testing in order to force a + // pipeline recovery. + if (DFSClientFaultInjector.get().failPacket() && + isLastPacketInBlock) { + failPacket = true; + throw new IOException( + "Failing the last packet for testing."); + } + // update bytesAcked block.setNumBytes(one.getLastByteOffsetBlock()); @@ -1044,7 +1055,18 @@ private boolean setupPipelineForAppendOrRecovery() throws IOException { accessToken = lb.getBlockToken(); // set up the pipeline again with the remaining nodes - success = createBlockOutputStream(nodes, newGS, isRecovery); + if (failPacket) { // for testing + success = createBlockOutputStream(nodes, newGS-1, isRecovery); + failPacket = false; + try { + // Give DNs time to send in bad reports. In real situations, + // good reports should follow bad ones, if client committed + // with those nodes. + Thread.sleep(2000); + } catch (InterruptedException ie) {} + } else { + success = createBlockOutputStream(nodes, newGS, isRecovery); + } } if (success) { @@ -1904,7 +1926,9 @@ public synchronized void close() throws IOException { // be called during unit tests private void completeFile(ExtendedBlock last) throws IOException { long localstart = Time.now(); + long localTimeout = 400; boolean fileComplete = false; + int retries = dfsClient.getConf().nBlockWriteLocateFollowingRetry; while (!fileComplete) { fileComplete = dfsClient.namenode.complete(src, dfsClient.clientName, last, fileId); @@ -1920,7 +1944,13 @@ private void completeFile(ExtendedBlock last) throws IOException { throw new IOException(msg); } try { - Thread.sleep(400); + Thread.sleep(localTimeout); + if (retries == 0) { + throw new IOException("Unable to close file because the last block" + + " does not have enough number of replicas."); + } + retries--; + localTimeout *= 2; if (Time.now() - localstart > 5000) { DFSClient.LOG.info("Could not complete " + src + " retrying..."); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java index a2202feefba..b13d7e3252d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java @@ -229,6 +229,29 @@ public long getBlockRecoveryId() { return blockRecoveryId; } + /** + * Process the recorded replicas. When about to commit or finish the + * pipeline recovery sort out bad replicas. + * @param genStamp The final generation stamp for the block. + */ + public void setGenerationStampAndVerifyReplicas(long genStamp) { + if (replicas == null) + return; + + // Remove the replicas with wrong gen stamp. + // The replica list is unchanged. + for (ReplicaUnderConstruction r : replicas) { + if (genStamp != r.getGenerationStamp()) { + r.getExpectedStorageLocation().removeBlock(this); + NameNode.blockStateChangeLog.info("BLOCK* Removing stale replica " + + "from location: " + r); + } + } + + // Set the generation stamp for the block. + setGenerationStamp(genStamp); + } + /** * Commit block's length and generation stamp as reported by the client. * Set block state to {@link BlockUCState#COMMITTED}. @@ -301,6 +324,8 @@ void addReplicaIfNotPresent(DatanodeStorageInfo storage, while (it.hasNext()) { ReplicaUnderConstruction r = it.next(); if(r.getExpectedStorageLocation() == storage) { + // Record the gen stamp from the report + r.setGenerationStamp(block.getGenerationStamp()); return; } else if (r.getExpectedStorageLocation().getDatanodeDescriptor() == storage.getDatanodeDescriptor()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index dc540d4e675..af21b7c44cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; +import org.apache.hadoop.hdfs.server.blockmanagement.CorruptReplicasMap.Reason; import org.apache.hadoop.hdfs.server.blockmanagement.PendingDataNodeMessages.ReportedBlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; @@ -1051,7 +1052,8 @@ public void findAndMarkBlockAsCorrupt(final ExtendedBlock blk, + blk + " not found"); return; } - markBlockAsCorrupt(new BlockToMarkCorrupt(storedBlock, reason), dn, storageID); + markBlockAsCorrupt(new BlockToMarkCorrupt(storedBlock, reason, + Reason.CORRUPTION_REPORTED), dn, storageID); } private void markBlockAsCorrupt(BlockToMarkCorrupt b, @@ -1074,7 +1076,8 @@ private void markBlockAsCorrupt(BlockToMarkCorrupt b, node.addBlock(storageID, b.stored); // Add this replica to corruptReplicas Map - corruptReplicas.addToCorruptReplicasMap(b.corrupted, node, b.reason); + corruptReplicas.addToCorruptReplicasMap(b.corrupted, node, b.reason, + b.reasonCode); if (countNodes(b.stored).liveReplicas() >= bc.getBlockReplication()) { // the block is over-replicated so invalidate the replicas immediately invalidateBlock(b, node); @@ -1574,22 +1577,27 @@ private static class BlockToMarkCorrupt { final BlockInfo stored; /** The reason to mark corrupt. */ final String reason; - - BlockToMarkCorrupt(BlockInfo corrupted, BlockInfo stored, String reason) { + /** The reason code to be stored */ + final Reason reasonCode; + + BlockToMarkCorrupt(BlockInfo corrupted, BlockInfo stored, String reason, + Reason reasonCode) { Preconditions.checkNotNull(corrupted, "corrupted is null"); Preconditions.checkNotNull(stored, "stored is null"); this.corrupted = corrupted; this.stored = stored; this.reason = reason; + this.reasonCode = reasonCode; } - BlockToMarkCorrupt(BlockInfo stored, String reason) { - this(stored, stored, reason); + BlockToMarkCorrupt(BlockInfo stored, String reason, Reason reasonCode) { + this(stored, stored, reason, reasonCode); } - BlockToMarkCorrupt(BlockInfo stored, long gs, String reason) { - this(new BlockInfo(stored), stored, reason); + BlockToMarkCorrupt(BlockInfo stored, long gs, String reason, + Reason reasonCode) { + this(new BlockInfo(stored), stored, reason, reasonCode); //the corrupted block in datanode has a different generation stamp corrupted.setGenerationStamp(gs); } @@ -1946,9 +1954,11 @@ private BlockInfo processReportedBlock(final DatanodeDescriptor dn, return storedBlock; } - //add replica if appropriate + // Add replica if appropriate. If the replica was previously corrupt + // but now okay, it might need to be updated. if (reportedState == ReplicaState.FINALIZED - && storedBlock.findDatanode(dn) < 0) { + && (storedBlock.findDatanode(dn) < 0 + || corruptReplicas.isReplicaCorrupt(storedBlock, dn))) { toAdd.add(storedBlock); } return storedBlock; @@ -2039,12 +2049,13 @@ private BlockToMarkCorrupt checkReplicaCorrupt( return new BlockToMarkCorrupt(storedBlock, reportedGS, "block is " + ucState + " and reported genstamp " + reportedGS + " does not match genstamp in block map " - + storedBlock.getGenerationStamp()); + + storedBlock.getGenerationStamp(), Reason.GENSTAMP_MISMATCH); } else if (storedBlock.getNumBytes() != reported.getNumBytes()) { return new BlockToMarkCorrupt(storedBlock, "block is " + ucState + " and reported length " + reported.getNumBytes() + " does not match " + - "length in block map " + storedBlock.getNumBytes()); + "length in block map " + storedBlock.getNumBytes(), + Reason.SIZE_MISMATCH); } else { return null; // not corrupt } @@ -2060,7 +2071,7 @@ private BlockToMarkCorrupt checkReplicaCorrupt( return new BlockToMarkCorrupt(storedBlock, reportedGS, "reported " + reportedState + " replica with genstamp " + reportedGS + " does not match COMPLETE block's genstamp in block map " - + storedBlock.getGenerationStamp()); + + storedBlock.getGenerationStamp(), Reason.GENSTAMP_MISMATCH); } else { // COMPLETE block, same genstamp if (reportedState == ReplicaState.RBW) { // If it's a RBW report for a COMPLETE block, it may just be that @@ -2073,7 +2084,8 @@ private BlockToMarkCorrupt checkReplicaCorrupt( return null; } else { return new BlockToMarkCorrupt(storedBlock, - "reported replica has invalid state " + reportedState); + "reported replica has invalid state " + reportedState, + Reason.INVALID_STATE); } } case RUR: // should not be reported @@ -2084,7 +2096,7 @@ private BlockToMarkCorrupt checkReplicaCorrupt( " on " + dn + " size " + storedBlock.getNumBytes(); // log here at WARN level since this is really a broken HDFS invariant LOG.warn(msg); - return new BlockToMarkCorrupt(storedBlock, msg); + return new BlockToMarkCorrupt(storedBlock, msg, Reason.INVALID_STATE); } } @@ -2201,6 +2213,11 @@ private Block addStoredBlock(final BlockInfo block, logAddStoredBlock(storedBlock, node); } } else { + // if the same block is added again and the replica was corrupt + // previously because of a wrong gen stamp, remove it from the + // corrupt block list. + corruptReplicas.removeFromCorruptReplicasMap(block, node, + Reason.GENSTAMP_MISMATCH); curReplicaDelta = 0; blockLog.warn("BLOCK* addStoredBlock: " + "Redundant addStoredBlock request received for " + storedBlock @@ -2297,7 +2314,8 @@ private void invalidateCorruptReplicas(BlockInfo blk) { DatanodeDescriptor[] nodesCopy = nodes.toArray(new DatanodeDescriptor[0]); for (DatanodeDescriptor node : nodesCopy) { try { - if (!invalidateBlock(new BlockToMarkCorrupt(blk, null), node)) { + if (!invalidateBlock(new BlockToMarkCorrupt(blk, null, + Reason.ANY), node)) { removedFromBlocksMap = false; } } catch (IOException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java index 5d9c39c16a0..1cef3ee9251 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java @@ -204,6 +204,7 @@ private void rescan() { namesystem.writeLock(); try { rescanCachedBlockMap(); + blockManager.getDatanodeManager().resetLastCachingDirectiveSentTime(); } finally { namesystem.writeUnlock(); } @@ -316,17 +317,21 @@ private void rescanCachedBlockMap() { int numCached = cached.size(); if (numCached >= neededCached) { // If we have enough replicas, drop all pending cached. - for (DatanodeDescriptor datanode : pendingCached) { + for (Iterator iter = pendingCached.iterator(); + iter.hasNext(); ) { + DatanodeDescriptor datanode = iter.next(); datanode.getPendingCached().remove(cblock); + iter.remove(); } - pendingCached.clear(); } if (numCached < neededCached) { // If we don't have enough replicas, drop all pending uncached. - for (DatanodeDescriptor datanode : pendingUncached) { + for (Iterator iter = pendingUncached.iterator(); + iter.hasNext(); ) { + DatanodeDescriptor datanode = iter.next(); datanode.getPendingUncached().remove(cblock); + iter.remove(); } - pendingUncached.clear(); } int neededUncached = numCached - (pendingUncached.size() + neededCached); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java index 4613199ee6e..cb9f79ab448 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java @@ -36,8 +36,18 @@ @InterfaceAudience.Private public class CorruptReplicasMap{ - private SortedMap> corruptReplicasMap = - new TreeMap>(); + /** The corruption reason code */ + public static enum Reason { + NONE, // not specified. + ANY, // wildcard reason + GENSTAMP_MISMATCH, // mismatch in generation stamps + SIZE_MISMATCH, // mismatch in sizes + INVALID_STATE, // invalid state + CORRUPTION_REPORTED // client or datanode reported the corruption + } + + private SortedMap> corruptReplicasMap = + new TreeMap>(); /** * Mark the block belonging to datanode as corrupt. @@ -48,9 +58,22 @@ public class CorruptReplicasMap{ */ public void addToCorruptReplicasMap(Block blk, DatanodeDescriptor dn, String reason) { - Collection nodes = getNodes(blk); + addToCorruptReplicasMap(blk, dn, reason, Reason.NONE); + } + + /** + * Mark the block belonging to datanode as corrupt. + * + * @param blk Block to be added to CorruptReplicasMap + * @param dn DatanodeDescriptor which holds the corrupt replica + * @param reason a textual reason (for logging purposes) + * @param reasonCode the enum representation of the reason + */ + public void addToCorruptReplicasMap(Block blk, DatanodeDescriptor dn, + String reason, Reason reasonCode) { + Map nodes = corruptReplicasMap.get(blk); if (nodes == null) { - nodes = new TreeSet(); + nodes = new HashMap(); corruptReplicasMap.put(blk, nodes); } @@ -61,8 +84,7 @@ public void addToCorruptReplicasMap(Block blk, DatanodeDescriptor dn, reasonText = ""; } - if (!nodes.contains(dn)) { - nodes.add(dn); + if (!nodes.keySet().contains(dn)) { NameNode.blockStateChangeLog.info("BLOCK NameSystem.addToCorruptReplicasMap: "+ blk.getBlockName() + " added as corrupt on " + dn + @@ -76,6 +98,8 @@ public void addToCorruptReplicasMap(Block blk, DatanodeDescriptor dn, " by " + Server.getRemoteIp() + reasonText); } + // Add the node or update the reason. + nodes.put(dn, reasonCode); } /** @@ -97,10 +121,24 @@ void removeFromCorruptReplicasMap(Block blk) { false if the replica is not in the map */ boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode) { - Collection datanodes = corruptReplicasMap.get(blk); + return removeFromCorruptReplicasMap(blk, datanode, Reason.ANY); + } + + boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode, + Reason reason) { + Map datanodes = corruptReplicasMap.get(blk); + boolean removed = false; if (datanodes==null) return false; - if (datanodes.remove(datanode)) { // remove the replicas + + // if reasons can be compared but don't match, return false. + Reason storedReason = datanodes.get(datanode); + if (reason != Reason.ANY && storedReason != null && + reason != storedReason) { + return false; + } + + if (datanodes.remove(datanode) != null) { // remove the replicas if (datanodes.isEmpty()) { // remove the block if there is no more corrupted replicas corruptReplicasMap.remove(blk); @@ -118,7 +156,10 @@ boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode) { * @return collection of nodes. Null if does not exists */ Collection getNodes(Block blk) { - return corruptReplicasMap.get(blk); + Map nodes = corruptReplicasMap.get(blk); + if (nodes == null) + return null; + return nodes.keySet(); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index d0aef08cb4b..5d64e0ca235 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -170,6 +170,21 @@ public CachedBlocksList getPendingUncached() { return pendingUncached; } + /** + * The time when the last batch of caching directives was sent, in + * monotonic milliseconds. + */ + private long lastCachingDirectiveSentTimeMs; + + /** + * Head of the list of blocks on the datanode + */ + private volatile BlockInfo blockList = null; + /** + * Number of blocks on the datanode + */ + private int numBlocks = 0; + // isAlive == heartbeats.contains(this) // This is an optimization, because contains takes O(n) time on Arraylist public boolean isAlive = false; @@ -661,5 +676,21 @@ public DatanodeStorageInfo updateStorage(DatanodeStorage s) { return storage; } } + + /** + * @return The time at which we last sent caching directives to this + * DataNode, in monotonic milliseconds. + */ + public long getLastCachingDirectiveSentTimeMs() { + return this.lastCachingDirectiveSentTimeMs; + } + + /** + * @param time The time at which we last sent caching directives to this + * DataNode, in monotonic milliseconds. + */ + public void setLastCachingDirectiveSentTimeMs(long time) { + this.lastCachingDirectiveSentTimeMs = time; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index f5560b680bc..3bb1f150bcb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -149,7 +149,7 @@ public class DatanodeManager { * Whether we should tell datanodes what to cache in replies to * heartbeat messages. */ - private boolean sendCachingCommands = false; + private boolean shouldSendCachingCommands = false; /** * The number of datanodes for each software version. This list should change @@ -159,6 +159,16 @@ public class DatanodeManager { private HashMap datanodesSoftwareVersions = new HashMap(4, 0.75f); + /** + * The minimum time between resending caching directives to Datanodes, + * in milliseconds. + * + * Note that when a rescan happens, we will send the new directives + * as soon as possible. This timeout only applies to resending + * directives that we've already sent. + */ + private final long timeBetweenResendingCachingDirectivesMs; + DatanodeManager(final BlockManager blockManager, final Namesystem namesystem, final Configuration conf) throws IOException { this.namesystem = namesystem; @@ -241,6 +251,9 @@ public class DatanodeManager { DFSConfigKeys.DFS_NAMENODE_USE_STALE_DATANODE_FOR_WRITE_RATIO_KEY + " = '" + ratioUseStaleDataNodesForWrite + "' is invalid. " + "It should be a positive non-zero float value, not greater than 1.0f."); + this.timeBetweenResendingCachingDirectivesMs = conf.getLong( + DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS, + DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS_DEFAULT); } private static long getStaleIntervalFromConf(Configuration conf, @@ -1297,17 +1310,28 @@ public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg, cmds.add(new BlockCommand(DatanodeProtocol.DNA_INVALIDATE, blockPoolId, blks)); } - DatanodeCommand pendingCacheCommand = - getCacheCommand(nodeinfo.getPendingCached(), nodeinfo, - DatanodeProtocol.DNA_CACHE, blockPoolId); - if (pendingCacheCommand != null) { - cmds.add(pendingCacheCommand); - } - DatanodeCommand pendingUncacheCommand = - getCacheCommand(nodeinfo.getPendingUncached(), nodeinfo, - DatanodeProtocol.DNA_UNCACHE, blockPoolId); - if (pendingUncacheCommand != null) { - cmds.add(pendingUncacheCommand); + boolean sendingCachingCommands = false; + long nowMs = Time.monotonicNow(); + if (shouldSendCachingCommands && + ((nowMs - nodeinfo.getLastCachingDirectiveSentTimeMs()) >= + timeBetweenResendingCachingDirectivesMs)) { + DatanodeCommand pendingCacheCommand = + getCacheCommand(nodeinfo.getPendingCached(), nodeinfo, + DatanodeProtocol.DNA_CACHE, blockPoolId); + if (pendingCacheCommand != null) { + cmds.add(pendingCacheCommand); + sendingCachingCommands = true; + } + DatanodeCommand pendingUncacheCommand = + getCacheCommand(nodeinfo.getPendingUncached(), nodeinfo, + DatanodeProtocol.DNA_UNCACHE, blockPoolId); + if (pendingUncacheCommand != null) { + cmds.add(pendingUncacheCommand); + sendingCachingCommands = true; + } + if (sendingCachingCommands) { + nodeinfo.setLastCachingDirectiveSentTimeMs(nowMs); + } } blockManager.addKeyUpdateCommand(cmds, nodeinfo); @@ -1345,19 +1369,13 @@ private DatanodeCommand getCacheCommand(CachedBlocksList list, if (length == 0) { return null; } - // Read and clear the existing cache commands. + // Read the existing cache commands. long[] blockIds = new long[length]; int i = 0; for (Iterator iter = list.iterator(); iter.hasNext(); ) { CachedBlock cachedBlock = iter.next(); blockIds[i++] = cachedBlock.getBlockId(); - iter.remove(); - } - if (!sendCachingCommands) { - // Do not send caching commands unless the FSNamesystem told us we - // should. - return null; } return new BlockIdCommand(action, poolId, blockIds); } @@ -1408,13 +1426,25 @@ public void clearPendingQueues() { } } + /** + * Reset the lastCachingDirectiveSentTimeMs field of all the DataNodes we + * know about. + */ + public void resetLastCachingDirectiveSentTime() { + synchronized (datanodeMap) { + for (DatanodeDescriptor dn : datanodeMap.values()) { + dn.setLastCachingDirectiveSentTimeMs(0L); + } + } + } + @Override public String toString() { return getClass().getSimpleName() + ": " + host2DatanodeMap; } - public void setSendCachingCommands(boolean sendCachingCommands) { - this.sendCachingCommands = sendCachingCommands; + public void setShouldSendCachingCommands(boolean shouldSendCachingCommands) { + this.shouldSendCachingCommands = shouldSendCachingCommands; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java index 3e12168e22d..48ac9507b17 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java @@ -289,6 +289,10 @@ synchronized void cacheBlock(long blockId, String bpid, mappableBlockMap.put(key, new Value(null, State.CACHING)); volumeExecutor.execute( new CachingTask(key, blockFileName, length, genstamp)); + if (LOG.isDebugEnabled()) { + LOG.debug("Initiating caching for Block with id " + blockId + + ", pool " + bpid); + } } synchronized void uncacheBlock(String bpid, long blockId) { @@ -427,6 +431,10 @@ public void run() { mappableBlock.close(); } numBlocksFailedToCache.incrementAndGet(); + + synchronized (FsDatasetCache.this) { + mappableBlockMap.remove(key); + } } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlock.java index 09d2ed6d5e4..29d472335a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/MappableBlock.java @@ -44,20 +44,6 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class MappableBlock implements Closeable { - public static interface Mlocker { - void mlock(MappedByteBuffer mmap, long length) throws IOException; - } - - private static class PosixMlocker implements Mlocker { - public void mlock(MappedByteBuffer mmap, long length) - throws IOException { - NativeIO.POSIX.mlock(mmap, length); - } - } - - @VisibleForTesting - public static Mlocker mlocker = new PosixMlocker(); - private MappedByteBuffer mmap; private final long length; @@ -96,7 +82,7 @@ public static MappableBlock load(long length, throw new IOException("Block InputStream has no FileChannel."); } mmap = blockChannel.map(MapMode.READ_ONLY, 0, length); - mlocker.mlock(mmap, length); + NativeIO.POSIX.cacheManipulator.mlock(blockFileName, mmap, length); verifyChecksum(length, metaIn, blockChannel, blockFileName); mappableBlock = new MappableBlock(mmap, length); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java new file mode 100644 index 00000000000..dab64ec769b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class ContentSummaryComputationContext { + private FSDirectory dir = null; + private FSNamesystem fsn = null; + private Content.Counts counts = null; + private long nextCountLimit = 0; + private long limitPerRun = 0; + private long yieldCount = 0; + + /** + * Constructor + * + * @param dir The FSDirectory instance + * @param fsn The FSNamesystem instance + * @param limitPerRun allowed number of operations in one + * locking period. 0 or a negative number means + * no limit (i.e. no yielding) + */ + public ContentSummaryComputationContext(FSDirectory dir, + FSNamesystem fsn, long limitPerRun) { + this.dir = dir; + this.fsn = fsn; + this.limitPerRun = limitPerRun; + this.nextCountLimit = limitPerRun; + this.counts = Content.Counts.newInstance(); + } + + /** Constructor for blocking computation. */ + public ContentSummaryComputationContext() { + this(null, null, 0); + } + + /** Return current yield count */ + public long getYieldCount() { + return yieldCount; + } + + /** + * Relinquish locks held during computation for a short while + * and reacquire them. This will give other threads a chance + * to acquire the contended locks and run. + * + * @return true if locks were released and reacquired. + */ + public boolean yield() { + // Are we set up to do this? + if (limitPerRun <= 0 || dir == null || fsn == null) { + return false; + } + + // Have we reached the limit? + long currentCount = counts.get(Content.FILE) + + counts.get(Content.SYMLINK) + + counts.get(Content.DIRECTORY) + + counts.get(Content.SNAPSHOTTABLE_DIRECTORY); + if (currentCount <= nextCountLimit) { + return false; + } + + // Update the next limit + nextCountLimit = currentCount + limitPerRun; + + boolean hadDirReadLock = dir.hasReadLock(); + boolean hadDirWriteLock = dir.hasWriteLock(); + boolean hadFsnReadLock = fsn.hasReadLock(); + boolean hadFsnWriteLock = fsn.hasWriteLock(); + + // sanity check. + if (!hadDirReadLock || !hadFsnReadLock || hadDirWriteLock || + hadFsnWriteLock || dir.getReadHoldCount() != 1 || + fsn.getReadHoldCount() != 1) { + // cannot relinquish + return false; + } + + // unlock + dir.readUnlock(); + fsn.readUnlock(); + + try { + Thread.sleep(1); + } catch (InterruptedException ie) { + } finally { + // reacquire + fsn.readLock(); + dir.readLock(); + } + yieldCount++; + return true; + } + + /** Get the content counts */ + public Content.Counts getCounts() { + return counts; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 5402b0478a9..bce528fe7af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -114,7 +114,9 @@ private static INodeDirectoryWithQuota createRoot(FSNamesystem namesystem) { private final int maxComponentLength; private final int maxDirItems; private final int lsLimit; // max list limit + private final int contentCountLimit; // max content summary counts per run private final INodeMap inodeMap; // Synchronized by dirLock + private long yieldCount = 0; // keep track of lock yield count. // lock to protect the directory and BlockMap private ReentrantReadWriteLock dirLock; @@ -145,6 +147,14 @@ boolean hasReadLock() { return this.dirLock.getReadHoldCount() > 0; } + public int getReadHoldCount() { + return this.dirLock.getReadHoldCount(); + } + + public int getWriteHoldCount() { + return this.dirLock.getWriteHoldCount(); + } + /** * Caches frequently used file names used in {@link INode} to reuse * byte[] objects and reduce heap usage. @@ -161,6 +171,10 @@ boolean hasReadLock() { DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT); this.lsLimit = configuredLimit>0 ? configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT; + + this.contentCountLimit = conf.getInt( + DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, + DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT); // filesystem limits this.maxComponentLength = conf.getInt( @@ -2296,13 +2310,26 @@ ContentSummary getContentSummary(String src) throw new FileNotFoundException("File does not exist: " + srcs); } else { - return targetNode.computeContentSummary(); + // Make it relinquish locks everytime contentCountLimit entries are + // processed. 0 means disabled. I.e. blocking for the entire duration. + ContentSummaryComputationContext cscc = + + new ContentSummaryComputationContext(this, getFSNamesystem(), + contentCountLimit); + ContentSummary cs = targetNode.computeAndConvertContentSummary(cscc); + yieldCount += cscc.getYieldCount(); + return cs; } } finally { readUnlock(); } } + @VisibleForTesting + public long getYieldCount() { + return yieldCount; + } + public INodeMap getINodeMap() { return inodeMap; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index aaa82cfa870..ec11e5d8f61 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -593,7 +593,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, fsNamesys.getSnapshotManager().deleteSnapshot( deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, collectedBlocks, removedINodes); - fsNamesys.removeBlocks(collectedBlocks); + fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks); collectedBlocks.clear(); fsNamesys.dir.removeFromInodeMap(removedINodes); removedINodes.clear(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index fddeb6f5ec3..160b8cc2d63 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1009,7 +1009,7 @@ void startActiveServices() throws IOException { nnEditLogRoller.start(); cacheManager.activate(); - blockManager.getDatanodeManager().setSendCachingCommands(true); + blockManager.getDatanodeManager().setShouldSendCachingCommands(true); } finally { writeUnlock(); startingActiveService = false; @@ -1060,7 +1060,7 @@ void stopActiveServices() { dir.fsImage.updateLastAppliedTxIdFromWritten(); } cacheManager.deactivate(); - blockManager.getDatanodeManager().setSendCachingCommands(false); + blockManager.getDatanodeManager().setShouldSendCachingCommands(false); } finally { writeUnlock(); } @@ -1297,6 +1297,14 @@ public boolean hasReadOrWriteLock() { return hasReadLock() || hasWriteLock(); } + public int getReadHoldCount() { + return this.fsLock.getReadHoldCount(); + } + + public int getWriteHoldCount() { + return this.fsLock.getWriteHoldCount(); + } + NamespaceInfo getNamespaceInfo() { readLock(); try { @@ -3305,6 +3313,18 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks, return; } + removeBlocksAndUpdateSafemodeTotal(blocks); + } + + /** + * Removes the blocks from blocksmap and updates the safemode blocks total + * + * @param blocks + * An instance of {@link BlocksMapUpdateInfo} which contains a list + * of blocks that need to be removed from blocksMap + */ + void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { + assert hasWriteLock(); // In the case that we are a Standby tailing edits from the // active while in safe-mode, we need to track the total number // of blocks and safe blocks in the system. @@ -3325,9 +3345,9 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks, } if (trackBlockCounts) { if (LOG.isDebugEnabled()) { - LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" + - "decreasing safeBlocks by " + numRemovedSafe + - ", totalBlocks by " + numRemovedComplete); + LOG.debug("Adjusting safe-mode totals for deletion." + + "decreasing safeBlocks by " + numRemovedSafe + + ", totalBlocks by " + numRemovedComplete); } adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); } @@ -5883,8 +5903,8 @@ private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, } // Update old block with the new generation stamp and new length - blockinfo.setGenerationStamp(newBlock.getGenerationStamp()); blockinfo.setNumBytes(newBlock.getNumBytes()); + blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); // find the DatanodeDescriptor objects final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() @@ -6953,6 +6973,7 @@ void deleteSnapshot(String snapshotRoot, String snapshotName) return; // Return previous response } boolean success = false; + BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -6961,7 +6982,6 @@ void deleteSnapshot(String snapshotRoot, String snapshotName) checkOwner(pc, snapshotRoot); } - BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); List removedINodes = new ChunkedArrayList(); dir.writeLock(); try { @@ -6972,8 +6992,6 @@ void deleteSnapshot(String snapshotRoot, String snapshotName) dir.writeUnlock(); } removedINodes.clear(); - this.removeBlocks(collectedBlocks); - collectedBlocks.clear(); getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName, cacheEntry != null); success = true; @@ -6982,7 +7000,10 @@ void deleteSnapshot(String snapshotRoot, String snapshotName) RetryCache.setState(cacheEntry, success); } getEditLog().logSync(); - + + removeBlocks(collectedBlocks); + collectedBlocks.clear(); + if (auditLog.isInfoEnabled() && isExternalInvocation()) { String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); logAuditEvent(true, "deleteSnapshot", rootPath, null, null); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index 977c801013e..1aff9784aba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -371,10 +371,18 @@ public abstract Quota.Counts cleanSubtree(final Snapshot snapshot, public abstract void destroyAndCollectBlocks( BlocksMapUpdateInfo collectedBlocks, List removedINodes); - /** Compute {@link ContentSummary}. */ + /** Compute {@link ContentSummary}. Blocking call */ public final ContentSummary computeContentSummary() { - final Content.Counts counts = computeContentSummary( - Content.Counts.newInstance()); + return computeAndConvertContentSummary( + new ContentSummaryComputationContext()); + } + + /** + * Compute {@link ContentSummary}. + */ + public final ContentSummary computeAndConvertContentSummary( + ContentSummaryComputationContext summary) { + Content.Counts counts = computeContentSummary(summary).getCounts(); return new ContentSummary(counts.get(Content.LENGTH), counts.get(Content.FILE) + counts.get(Content.SYMLINK), counts.get(Content.DIRECTORY), getNsQuota(), @@ -384,10 +392,12 @@ public final ContentSummary computeContentSummary() { /** * Count subtree content summary with a {@link Content.Counts}. * - * @param counts The subtree counts for returning. - * @return The same objects as the counts parameter. + * @param summary the context object holding counts for the subtree. + * @return The same objects as summary. */ - public abstract Content.Counts computeContentSummary(Content.Counts counts); + public abstract ContentSummaryComputationContext computeContentSummary( + ContentSummaryComputationContext summary); + /** * Check and add namespace/diskspace consumed to itself and the ancestors. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java index 2460b687b26..cf9c232f7b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java @@ -466,12 +466,45 @@ public Quota.Counts computeQuotaUsage4CurrentDirectory(Quota.Counts counts) { } @Override - public Content.Counts computeContentSummary(final Content.Counts counts) { - for (INode child : getChildrenList(null)) { - child.computeContentSummary(counts); + public ContentSummaryComputationContext computeContentSummary( + ContentSummaryComputationContext summary) { + ReadOnlyList childrenList = getChildrenList(null); + // Explicit traversing is done to enable repositioning after relinquishing + // and reacquiring locks. + for (int i = 0; i < childrenList.size(); i++) { + INode child = childrenList.get(i); + byte[] childName = child.getLocalNameBytes(); + + long lastYieldCount = summary.getYieldCount(); + child.computeContentSummary(summary); + + // Check whether the computation was paused in the subtree. + // The counts may be off, but traversing the rest of children + // should be made safe. + if (lastYieldCount == summary.getYieldCount()) { + continue; + } + + // The locks were released and reacquired. Check parent first. + if (getParent() == null) { + // Stop further counting and return whatever we have so far. + break; + } + + // Obtain the children list again since it may have been modified. + childrenList = getChildrenList(null); + // Reposition in case the children list is changed. Decrement by 1 + // since it will be incremented when loops. + i = nextChild(childrenList, childName) - 1; } - counts.add(Content.DIRECTORY, 1); - return counts; + + // Increment the directory count for this directory. + summary.getCounts().add(Content.DIRECTORY, 1); + + // Relinquish and reacquire locks if necessary. + summary.yield(); + + return summary; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryWithQuota.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryWithQuota.java index f2b607ccaa7..e18420df4fb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryWithQuota.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryWithQuota.java @@ -107,12 +107,16 @@ public Quota.Counts computeQuotaUsage(Quota.Counts counts, boolean useCache, } @Override - public Content.Counts computeContentSummary( - final Content.Counts counts) { - final long original = counts.get(Content.DISKSPACE); - super.computeContentSummary(counts); - checkDiskspace(counts.get(Content.DISKSPACE) - original); - return counts; + public ContentSummaryComputationContext computeContentSummary( + final ContentSummaryComputationContext summary) { + final long original = summary.getCounts().get(Content.DISKSPACE); + long oldYieldCount = summary.getYieldCount(); + super.computeContentSummary(summary); + // Check only when the content has not changed in the middle. + if (oldYieldCount == summary.getYieldCount()) { + checkDiskspace(summary.getCounts().get(Content.DISKSPACE) - original); + } + return summary; } private void checkDiskspace(final long computed) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index 455d808a37f..1b6478d922f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -342,11 +342,11 @@ public final Quota.Counts computeQuotaUsage(Quota.Counts counts, } @Override - public final Content.Counts computeContentSummary( - final Content.Counts counts) { - computeContentSummary4Snapshot(counts); - computeContentSummary4Current(counts); - return counts; + public final ContentSummaryComputationContext computeContentSummary( + final ContentSummaryComputationContext summary) { + computeContentSummary4Snapshot(summary.getCounts()); + computeContentSummary4Current(summary.getCounts()); + return summary; } private void computeContentSummary4Snapshot(final Content.Counts counts) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java index 3b7b4b6e42b..6b5b0f529a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java @@ -107,7 +107,8 @@ public Counts computeQuotaUsage(Counts counts, boolean useCache, } @Override - public Content.Counts computeContentSummary(Content.Counts counts) { + public ContentSummaryComputationContext computeContentSummary( + ContentSummaryComputationContext summary) { return null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java index 1f4ff7de18d..a049c012917 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java @@ -278,8 +278,9 @@ public void destroyAndCollectBlocks( } @Override - public Content.Counts computeContentSummary(Content.Counts counts) { - return referred.computeContentSummary(counts); + public ContentSummaryComputationContext computeContentSummary( + ContentSummaryComputationContext summary) { + return referred.computeContentSummary(summary); } @Override @@ -444,12 +445,13 @@ public int getLastSnapshotId() { } @Override - public final Content.Counts computeContentSummary(Content.Counts counts) { + public final ContentSummaryComputationContext computeContentSummary( + ContentSummaryComputationContext summary) { //only count diskspace for WithName final Quota.Counts q = Quota.Counts.newInstance(); computeQuotaUsage(q, false, lastSnapshotId); - counts.add(Content.DISKSPACE, q.get(Quota.DISKSPACE)); - return counts; + summary.getCounts().add(Content.DISKSPACE, q.get(Quota.DISKSPACE)); + return summary; } @Override @@ -688,4 +690,4 @@ private Snapshot getSelfSnapshot(final Snapshot prior) { } } } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java index ba41a2ecdb0..21a04a3b1ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java @@ -98,9 +98,10 @@ public Quota.Counts computeQuotaUsage(Quota.Counts counts, } @Override - public Content.Counts computeContentSummary(final Content.Counts counts) { - counts.add(Content.SYMLINK, 1); - return counts; + public ContentSummaryComputationContext computeContentSummary( + final ContentSummaryComputationContext summary) { + summary.getCounts().add(Content.SYMLINK, 1); + return summary; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectorySnapshottable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectorySnapshottable.java index 75331615c37..1e2c5dd3db2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectorySnapshottable.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectorySnapshottable.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType; import org.apache.hadoop.hdfs.server.namenode.Content; +import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.INodeFile; @@ -342,11 +343,12 @@ Snapshot removeSnapshot(String snapshotName, } @Override - public Content.Counts computeContentSummary(final Content.Counts counts) { - super.computeContentSummary(counts); - counts.add(Content.SNAPSHOT, snapshotsByNames.size()); - counts.add(Content.SNAPSHOTTABLE_DIRECTORY, 1); - return counts; + public ContentSummaryComputationContext computeContentSummary( + final ContentSummaryComputationContext summary) { + super.computeContentSummary(summary); + summary.getCounts().add(Content.SNAPSHOT, snapshotsByNames.size()); + summary.getCounts().add(Content.SNAPSHOTTABLE_DIRECTORY, 1); + return summary; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectoryWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectoryWithSnapshot.java index dfbf22c7c05..296cd4c7698 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectoryWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectoryWithSnapshot.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType; import org.apache.hadoop.hdfs.server.namenode.Content; +import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext; import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; @@ -883,18 +884,27 @@ public Quota.Counts computeQuotaUsage4CurrentDirectory(Quota.Counts counts) { } @Override - public Content.Counts computeContentSummary(final Content.Counts counts) { - super.computeContentSummary(counts); - computeContentSummary4Snapshot(counts); - return counts; + public ContentSummaryComputationContext computeContentSummary( + final ContentSummaryComputationContext summary) { + // Snapshot summary calc won't be relinquishing locks in the middle. + // Do this first and handover to parent. + computeContentSummary4Snapshot(summary.getCounts()); + super.computeContentSummary(summary); + return summary; } private void computeContentSummary4Snapshot(final Content.Counts counts) { + // Create a new blank summary context for blocking processing of subtree. + ContentSummaryComputationContext summary = + new ContentSummaryComputationContext(); for(DirectoryDiff d : diffs) { for(INode deleted : d.getChildrenDiff().getList(ListType.DELETED)) { - deleted.computeContentSummary(counts); + deleted.computeContentSummary(summary); } } + // Add the counts from deleted trees. + counts.add(summary.getCounts()); + // Add the deleted directory count. counts.add(Content.DIRECTORY, diffs.asList().size()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java index 0685d860dd6..c420c551680 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java @@ -26,8 +26,8 @@ import java.io.PrintStream; import java.net.HttpURLConnection; import java.net.InetSocketAddress; +import java.net.URI; import java.net.URL; -import java.net.URLConnection; import java.security.PrivilegedExceptionAction; import java.util.Collection; import java.util.Date; @@ -47,11 +47,14 @@ import org.apache.hadoop.hdfs.server.namenode.GetDelegationTokenServlet; import org.apache.hadoop.hdfs.server.namenode.RenewDelegationTokenServlet; import org.apache.hadoop.hdfs.web.HftpFileSystem; +import org.apache.hadoop.hdfs.web.HsftpFileSystem; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.GenericOptionsParser; @@ -142,11 +145,11 @@ public static void main(final String[] args) throws Exception { // default to using the local file system FileSystem local = FileSystem.getLocal(conf); final Path tokenFile = new Path(local.getWorkingDirectory(), remaining[0]); + final URLConnectionFactory connectionFactory = URLConnectionFactory.DEFAULT_CONNECTION_FACTORY; // Login the current user UserGroupInformation.getCurrentUser().doAs( new PrivilegedExceptionAction() { - @SuppressWarnings("unchecked") @Override public Object run() throws Exception { @@ -182,7 +185,8 @@ public Object run() throws Exception { } else { // otherwise we are fetching if (webUrl != null) { - Credentials creds = getDTfromRemote(webUrl, renewer); + Credentials creds = getDTfromRemote(connectionFactory, new URI(webUrl), + renewer); creds.writeTokenStorageFile(tokenFile, conf); for (Token token : creds.getAllTokens()) { if(LOG.isDebugEnabled()) { @@ -208,32 +212,31 @@ public Object run() throws Exception { }); } - static public Credentials getDTfromRemote(String nnAddr, - String renewer) throws IOException { + static public Credentials getDTfromRemote(URLConnectionFactory factory, + URI nnUri, String renewer) throws IOException { + StringBuilder buf = new StringBuilder(nnUri.toString()) + .append(GetDelegationTokenServlet.PATH_SPEC); + if (renewer != null) { + buf.append("?").append(GetDelegationTokenServlet.RENEWER).append("=") + .append(renewer); + } + + HttpURLConnection conn = null; DataInputStream dis = null; - InetSocketAddress serviceAddr = NetUtils.createSocketAddr(nnAddr); - + InetSocketAddress serviceAddr = NetUtils.createSocketAddr(nnUri + .getAuthority()); + try { - StringBuffer url = new StringBuffer(); - if (renewer != null) { - url.append(nnAddr).append(GetDelegationTokenServlet.PATH_SPEC) - .append("?").append(GetDelegationTokenServlet.RENEWER).append("=") - .append(renewer); - } else { - url.append(nnAddr).append(GetDelegationTokenServlet.PATH_SPEC); - } - if(LOG.isDebugEnabled()) { - LOG.debug("Retrieving token from: " + url); + LOG.debug("Retrieving token from: " + buf); } - - URL remoteURL = new URL(url.toString()); - URLConnection connection = SecurityUtil.openSecureHttpConnection(remoteURL); - InputStream in = connection.getInputStream(); + + conn = run(factory, new URL(buf.toString())); + InputStream in = conn.getInputStream(); Credentials ts = new Credentials(); dis = new DataInputStream(in); ts.readFields(dis); - for(Token token: ts.getAllTokens()) { + for (Token token : ts.getAllTokens()) { token.setKind(HftpFileSystem.TOKEN_KIND); SecurityUtil.setTokenService(token, serviceAddr); } @@ -241,53 +244,70 @@ static public Credentials getDTfromRemote(String nnAddr, } catch (Exception e) { throw new IOException("Unable to obtain remote token", e); } finally { - if(dis != null) dis.close(); + IOUtils.cleanup(LOG, dis); + if (conn != null) { + conn.disconnect(); + } } } + /** + * Cancel a Delegation Token. + * @param nnAddr the NameNode's address + * @param tok the token to cancel + * @throws IOException + * @throws AuthenticationException + */ + static public void cancelDelegationToken(URLConnectionFactory factory, + URI nnAddr, Token tok) throws IOException, + AuthenticationException { + StringBuilder buf = new StringBuilder(nnAddr.toString()) + .append(CancelDelegationTokenServlet.PATH_SPEC).append("?") + .append(CancelDelegationTokenServlet.TOKEN).append("=") + .append(tok.encodeToUrlString()); + HttpURLConnection conn = run(factory, new URL(buf.toString())); + conn.disconnect(); + } + /** * Renew a Delegation Token. * @param nnAddr the NameNode's address * @param tok the token to renew * @return the Date that the token will expire next. * @throws IOException + * @throws AuthenticationException */ - static public long renewDelegationToken(String nnAddr, - Token tok - ) throws IOException { - StringBuilder buf = new StringBuilder(); - buf.append(nnAddr); - buf.append(RenewDelegationTokenServlet.PATH_SPEC); - buf.append("?"); - buf.append(RenewDelegationTokenServlet.TOKEN); - buf.append("="); - buf.append(tok.encodeToUrlString()); - BufferedReader in = null; + static public long renewDelegationToken(URLConnectionFactory factory, + URI nnAddr, Token tok) throws IOException, + AuthenticationException { + StringBuilder buf = new StringBuilder(nnAddr.toString()) + .append(RenewDelegationTokenServlet.PATH_SPEC).append("?") + .append(RenewDelegationTokenServlet.TOKEN).append("=") + .append(tok.encodeToUrlString()); + HttpURLConnection connection = null; - + BufferedReader in = null; try { - URL url = new URL(buf.toString()); - connection = (HttpURLConnection) SecurityUtil.openSecureHttpConnection(url); - if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) { - throw new IOException("Error renewing token: " + - connection.getResponseMessage()); - } - in = new BufferedReader( - new InputStreamReader(connection.getInputStream(), Charsets.UTF_8)); + connection = run(factory, new URL(buf.toString())); + in = new BufferedReader(new InputStreamReader( + connection.getInputStream(), Charsets.UTF_8)); long result = Long.parseLong(in.readLine()); - in.close(); return result; } catch (IOException ie) { LOG.info("error in renew over HTTP", ie); IOException e = getExceptionFromResponse(connection); - IOUtils.cleanup(LOG, in); - if(e!=null) { - LOG.info("rethrowing exception from HTTP request: " + - e.getLocalizedMessage()); + if (e != null) { + LOG.info("rethrowing exception from HTTP request: " + + e.getLocalizedMessage()); throw e; } throw ie; + } finally { + IOUtils.cleanup(LOG, in); + if (connection != null) { + connection.disconnect(); + } } } @@ -339,43 +359,28 @@ static private IOException getExceptionFromResponse(HttpURLConnection con) { return e; } - - /** - * Cancel a Delegation Token. - * @param nnAddr the NameNode's address - * @param tok the token to cancel - * @throws IOException - */ - static public void cancelDelegationToken(String nnAddr, - Token tok - ) throws IOException { - StringBuilder buf = new StringBuilder(); - buf.append(nnAddr); - buf.append(CancelDelegationTokenServlet.PATH_SPEC); - buf.append("?"); - buf.append(CancelDelegationTokenServlet.TOKEN); - buf.append("="); - buf.append(tok.encodeToUrlString()); - BufferedReader in = null; - HttpURLConnection connection=null; + private static HttpURLConnection run(URLConnectionFactory factory, URL url) + throws IOException, AuthenticationException { + HttpURLConnection conn = null; + try { - URL url = new URL(buf.toString()); - connection = (HttpURLConnection) SecurityUtil.openSecureHttpConnection(url); - if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) { - throw new IOException("Error cancelling token: " + - connection.getResponseMessage()); + conn = (HttpURLConnection) factory.openConnection(url, true); + if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) { + String msg = conn.getResponseMessage(); + + throw new IOException("Error when dealing remote token: " + msg); } } catch (IOException ie) { - LOG.info("error in cancel over HTTP", ie); - IOException e = getExceptionFromResponse(connection); + LOG.info("Error when dealing remote token:", ie); + IOException e = getExceptionFromResponse(conn); - IOUtils.cleanup(LOG, in); - if(e!=null) { - LOG.info("rethrowing exception from HTTP request: " + - e.getLocalizedMessage()); + if (e != null) { + LOG.info("rethrowing exception from HTTP request: " + + e.getLocalizedMessage()); throw e; } throw ie; } + return conn; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/HftpFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/HftpFileSystem.java index b3336a5202d..8909c9d8f64 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/HftpFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/HftpFileSystem.java @@ -31,7 +31,6 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Collection; import java.util.TimeZone; import org.apache.hadoop.classification.InterfaceAudience; @@ -50,20 +49,17 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; -import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector; import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.hdfs.tools.DelegationTokenFetcher; -import org.apache.hadoop.hdfs.web.ByteRangeInputStream.URLOpener; import org.apache.hadoop.io.Text; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.TokenRenewer; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ServletUtil; import org.xml.sax.Attributes; @@ -83,7 +79,9 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class HftpFileSystem extends FileSystem - implements DelegationTokenRenewer.Renewable { + implements DelegationTokenRenewer.Renewable, TokenAspect.TokenManagementDelegator { + public static final String SCHEME = "hftp"; + static { HttpURLConnection.setFollowRedirects(true); } @@ -100,19 +98,13 @@ public class HftpFileSystem extends FileSystem public static final String HFTP_TIMEZONE = "UTC"; public static final String HFTP_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ssZ"; + private TokenAspect tokenAspect = new TokenAspect(this, TOKEN_KIND); private Token delegationToken; private Token renewToken; - private static final HftpDelegationTokenSelector hftpTokenSelector = - new HftpDelegationTokenSelector(); - private DelegationTokenRenewer dtRenewer = null; - - private synchronized void addRenewAction(final HftpFileSystem hftpFs) { - if (dtRenewer == null) { - dtRenewer = DelegationTokenRenewer.getInstance(); - } - - dtRenewer.addRenewAction(hftpFs); + @Override + public URI getCanonicalUri() { + return super.getCanonicalUri(); } public static final SimpleDateFormat getDateFormat() { @@ -177,7 +169,7 @@ protected URI canonicalizeUri(URI uri) { */ @Override public String getScheme() { - return "hftp"; + return SCHEME; } @Override @@ -195,39 +187,10 @@ public void initialize(final URI name, final Configuration conf) } if (UserGroupInformation.isSecurityEnabled()) { - initDelegationToken(); + tokenAspect.initDelegationToken(ugi); } } - protected void initDelegationToken() throws IOException { - // look for hftp token, then try hdfs - Token token = selectDelegationToken(ugi); - - // if we don't already have a token, go get one over https - boolean createdToken = false; - if (token == null) { - token = getDelegationToken(null); - createdToken = (token != null); - } - - // we already had a token or getDelegationToken() didn't fail. - if (token != null) { - setDelegationToken(token); - if (createdToken) { - addRenewAction(this); - LOG.debug("Created new DT for " + token.getService()); - } else { - LOG.debug("Found existing DT for " + token.getService()); - } - } - } - - protected Token selectDelegationToken( - UserGroupInformation ugi) { - return hftpTokenSelector.selectToken(nnUri, ugi.getTokens(), getConf()); - } - - @Override public Token getRenewToken() { return renewToken; @@ -242,16 +205,19 @@ protected String getUnderlyingProtocol() { @Override public synchronized void setDelegationToken(Token token) { + /** + * XXX The kind of the token has been changed by DelegationTokenFetcher. We + * use the token for renewal, since the reflection utilities needs the value + * of the kind field to correctly renew the token. + * + * For other operations, however, the client has to send a + * HDFS_DELEGATION_KIND token over the wire so that it can talk to Hadoop + * 0.20.3 clusters. Later releases fix this problem. See HDFS-5440 for more + * details. + */ renewToken = token; - // emulate the 203 usage of the tokens - // by setting the kind and service as if they were hdfs tokens delegationToken = new Token(token); - // NOTE: the remote nn must be configured to use hdfs delegationToken.setKind(DelegationTokenIdentifier.HDFS_DELEGATION_KIND); - // no need to change service because we aren't exactly sure what it - // should be. we can guess, but it might be wrong if the local conf - // value is incorrect. the service is a client side field, so the remote - // end does not care about the value } @Override @@ -266,7 +232,7 @@ public Token run() throws IOException { final String nnHttpUrl = nnUri.toString(); Credentials c; try { - c = DelegationTokenFetcher.getDTfromRemote(nnHttpUrl, renewer); + c = DelegationTokenFetcher.getDTfromRemote(connectionFactory, nnUri, renewer); } catch (IOException e) { if (e.getCause() instanceof ConnectException) { LOG.warn("Couldn't connect to " + nnHttpUrl + @@ -350,6 +316,7 @@ protected String addDelegationTokenParam(String query) throws IOException { String tokenString = null; if (UserGroupInformation.isSecurityEnabled()) { synchronized (this) { + tokenAspect.ensureTokenInitialized(); if (delegationToken != null) { tokenString = delegationToken.encodeToUrlString(); return (query + JspHelper.getDelegationTokenUrlParam(tokenString)); @@ -419,9 +386,7 @@ public FSDataInputStream open(Path f, int buffersize) throws IOException { @Override public void close() throws IOException { super.close(); - if (dtRenewer != null) { - dtRenewer.removeRenewAction(this); // blocks - } + tokenAspect.removeRenewAction(); } /** Class to parse and store a listing reply from the server. */ @@ -696,67 +661,33 @@ public ContentSummary getContentSummary(Path f) throws IOException { return cs != null? cs: super.getContentSummary(f); } - @InterfaceAudience.Private - public static class TokenManager extends TokenRenewer { - - @Override - public boolean handleKind(Text kind) { - return kind.equals(TOKEN_KIND); - } - - @Override - public boolean isManaged(Token token) throws IOException { - return true; - } - - protected String getUnderlyingProtocol() { - return "http"; - } - - @SuppressWarnings("unchecked") - @Override - public long renew(Token token, - Configuration conf) throws IOException { - // update the kerberos credentials, if they are coming from a keytab - UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); - InetSocketAddress serviceAddr = SecurityUtil.getTokenServiceAddr(token); - return - DelegationTokenFetcher.renewDelegationToken - (DFSUtil.createUri(getUnderlyingProtocol(), serviceAddr).toString(), - (Token) token); - } - - @SuppressWarnings("unchecked") - @Override - public void cancel(Token token, - Configuration conf) throws IOException { - // update the kerberos credentials, if they are coming from a keytab - UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); - InetSocketAddress serviceAddr = SecurityUtil.getTokenServiceAddr(token); - DelegationTokenFetcher.cancelDelegationToken - (DFSUtil.createUri(getUnderlyingProtocol(), serviceAddr).toString(), - (Token) token); + @SuppressWarnings("unchecked") + @Override + public long renewDelegationToken(Token token) throws IOException { + // update the kerberos credentials, if they are coming from a keytab + UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); + InetSocketAddress serviceAddr = SecurityUtil.getTokenServiceAddr(token); + try { + return DelegationTokenFetcher.renewDelegationToken(connectionFactory, + DFSUtil.createUri(getUnderlyingProtocol(), serviceAddr), + (Token) token); + } catch (AuthenticationException e) { + throw new IOException(e); } } - private static class HftpDelegationTokenSelector - extends AbstractDelegationTokenSelector { - private static final DelegationTokenSelector hdfsTokenSelector = - new DelegationTokenSelector(); - - public HftpDelegationTokenSelector() { - super(TOKEN_KIND); - } - - Token selectToken(URI nnUri, - Collection> tokens, Configuration conf) { - Token token = - selectToken(SecurityUtil.buildTokenService(nnUri), tokens); - if (token == null) { - // try to get a HDFS token - token = hdfsTokenSelector.selectToken(nnUri, tokens, conf); - } - return token; + @SuppressWarnings("unchecked") + @Override + public void cancelDelegationToken(Token token) throws IOException { + // update the kerberos credentials, if they are coming from a keytab + UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); + InetSocketAddress serviceAddr = SecurityUtil.getTokenServiceAddr(token); + try { + DelegationTokenFetcher.cancelDelegationToken(connectionFactory, DFSUtil + .createUri(getUnderlyingProtocol(), serviceAddr), + (Token) token); + } catch (AuthenticationException e) { + throw new IOException(e); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java new file mode 100644 index 00000000000..5464ba0c753 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/TokenAspect.java @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.web; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.URI; +import java.util.Collection; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DelegationTokenRenewer; +import org.apache.hadoop.fs.DelegationTokenRenewer.Renewable; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenRenewer; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; + +import com.google.common.annotations.VisibleForTesting; + +/** + * This class implements the aspects that relate to delegation tokens for all + * HTTP-based file system. + */ +final class TokenAspect { + @InterfaceAudience.Private + public static class TokenManager extends TokenRenewer { + + @Override + public void cancel(Token token, Configuration conf) throws IOException { + getInstance(token, conf).cancelDelegationToken(token); + } + + @Override + public boolean handleKind(Text kind) { + return kind.equals(HftpFileSystem.TOKEN_KIND) + || kind.equals(WebHdfsFileSystem.TOKEN_KIND); + } + + @Override + public boolean isManaged(Token token) throws IOException { + return true; + } + + @Override + public long renew(Token token, Configuration conf) throws IOException { + return getInstance(token, conf).renewDelegationToken(token); + } + + private TokenManagementDelegator getInstance(Token token, + Configuration conf) throws IOException { + final InetSocketAddress address = SecurityUtil.getTokenServiceAddr(token); + Text kind = token.getKind(); + final URI uri; + if (kind.equals(HftpFileSystem.TOKEN_KIND)) { + uri = DFSUtil.createUri(HftpFileSystem.SCHEME, address); + } else if (kind.equals(WebHdfsFileSystem.TOKEN_KIND)) { + uri = DFSUtil.createUri(WebHdfsFileSystem.SCHEME, address); + } else { + throw new IllegalArgumentException("Unsupported scheme"); + } + return (TokenManagementDelegator) FileSystem.get(uri, conf); + } + } + + static class DTSelecorByKind extends + AbstractDelegationTokenSelector { + private static final DelegationTokenSelector selector = new DelegationTokenSelector(); + + public DTSelecorByKind(final Text kind) { + super(kind); + } + + Token selectToken(URI nnUri, + Collection> tokens, Configuration conf) { + Token token = selectToken( + SecurityUtil.buildTokenService(nnUri), tokens); + if (token == null) { + token = selector.selectToken(nnUri, tokens, conf); + } + return token; + } + } + + /** + * Callbacks for token management + */ + interface TokenManagementDelegator { + void cancelDelegationToken(final Token token) throws IOException; + + URI getCanonicalUri(); + + long renewDelegationToken(final Token token) throws IOException; + } + + private DelegationTokenRenewer.RenewAction action; + private DelegationTokenRenewer dtRenewer = null; + private final DTSelecorByKind dtSelector; + private final T fs; + private boolean hasInitedToken; + private final Log LOG; + + TokenAspect(T fs, final Text kind) { + this.LOG = LogFactory.getLog(fs.getClass()); + this.fs = fs; + this.dtSelector = new DTSelecorByKind(kind); + } + + synchronized void ensureTokenInitialized() throws IOException { + // we haven't inited yet, or we used to have a token but it expired + if (!hasInitedToken || (action != null && !action.isValid())) { + //since we don't already have a token, go get one + Token token = fs.getDelegationToken(null); + // security might be disabled + if (token != null) { + fs.setDelegationToken(token); + addRenewAction(fs); + LOG.debug("Created new DT for " + token.getService()); + } + hasInitedToken = true; + } + } + + synchronized void initDelegationToken(UserGroupInformation ugi) { + Token token = selectDelegationToken(ugi); + if (token != null) { + LOG.debug("Found existing DT for " + token.getService()); + fs.setDelegationToken(token); + hasInitedToken = true; + } + } + + synchronized void removeRenewAction() throws IOException { + if (dtRenewer != null) { + dtRenewer.removeRenewAction(fs); + } + } + + @VisibleForTesting + Token selectDelegationToken( + UserGroupInformation ugi) { + return dtSelector.selectToken( + ((TokenManagementDelegator)fs).getCanonicalUri(), ugi.getTokens(), + fs.getConf()); + } + + private synchronized void addRenewAction(final T webhdfs) { + if (dtRenewer == null) { + dtRenewer = DelegationTokenRenewer.getInstance(); + } + + action = dtRenewer.addRenewAction(webhdfs); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java index 25227db074c..0ac7d651e95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java @@ -27,7 +27,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hdfs.web.resources.HttpOpParam; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.client.AuthenticatedURL; import org.apache.hadoop.security.authentication.client.AuthenticationException; @@ -77,24 +77,28 @@ public URLConnectionFactory(int socketTimeout) { * @throws IOException */ public URLConnection openConnection(URL url) throws IOException { - URLConnection connection = url.openConnection(); - if (connection instanceof HttpURLConnection) { - connConfigurator.configure((HttpURLConnection) connection); + try { + return openConnection(url, false); + } catch (AuthenticationException e) { + // Unreachable + return null; } - return connection; } /** * Opens a url with read and connect timeouts * - * @param url URL to open + * @param url + * URL to open + * @param isSpnego + * whether the url should be authenticated via SPNEGO * @return URLConnection * @throws IOException * @throws AuthenticationException */ - public URLConnection openConnection(HttpOpParam.Op op, URL url) + public URLConnection openConnection(URL url, boolean isSpnego) throws IOException, AuthenticationException { - if (op.getRequireAuth()) { + if (isSpnego) { if (LOG.isDebugEnabled()) { LOG.debug("open AuthenticatedURL connection" + url); } @@ -106,7 +110,11 @@ public URLConnection openConnection(HttpOpParam.Op op, URL url) if (LOG.isDebugEnabled()) { LOG.debug("open URL connection"); } - return openConnection(url); + URLConnection connection = url.openConnection(); + if (connection instanceof HttpURLConnection) { + connConfigurator.configure((HttpURLConnection) connection); + } + return connection; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 9b623c056f1..f293e71e743 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -30,7 +30,6 @@ import java.net.URISyntaxException; import java.net.URL; import java.security.PrivilegedExceptionAction; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.StringTokenizer; @@ -56,8 +55,8 @@ import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; -import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector; import org.apache.hadoop.hdfs.server.namenode.SafeModeException; +import org.apache.hadoop.hdfs.web.TokenAspect.DTSelecorByKind; import org.apache.hadoop.hdfs.web.resources.AccessTimeParam; import org.apache.hadoop.hdfs.web.resources.BlockSizeParam; import org.apache.hadoop.hdfs.web.resources.BufferSizeParam; @@ -96,8 +95,6 @@ import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.TokenRenewer; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; import org.apache.hadoop.util.Progressable; import org.mortbay.util.ajax.JSON; @@ -107,7 +104,7 @@ /** A FileSystem for HDFS over the web. */ public class WebHdfsFileSystem extends FileSystem - implements DelegationTokenRenewer.Renewable { + implements DelegationTokenRenewer.Renewable, TokenAspect.TokenManagementDelegator { public static final Log LOG = LogFactory.getLog(WebHdfsFileSystem.class); /** File System URI: {SCHEME}://namenode:port/path/to/file */ public static final String SCHEME = "webhdfs"; @@ -122,13 +119,18 @@ public class WebHdfsFileSystem extends FileSystem /** Delegation token kind */ public static final Text TOKEN_KIND = new Text("WEBHDFS delegation"); /** Token selector */ - public static final WebHdfsDelegationTokenSelector DT_SELECTOR - = new WebHdfsDelegationTokenSelector(); + public static final DTSelecorByKind DT_SELECTOR + = new DTSelecorByKind(TOKEN_KIND); private DelegationTokenRenewer dtRenewer = null; @VisibleForTesting DelegationTokenRenewer.RenewAction action; + @Override + public URI getCanonicalUri() { + return super.getCanonicalUri(); + } + @VisibleForTesting protected synchronized void addRenewAction(final WebHdfsFileSystem webhdfs) { if (dtRenewer == null) { @@ -142,7 +144,6 @@ protected synchronized void addRenewAction(final WebHdfsFileSystem webhdfs) { public static boolean isEnabled(final Configuration conf, final Log log) { final boolean b = conf.getBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, DFSConfigKeys.DFS_WEBHDFS_ENABLED_DEFAULT); - log.info(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY + " = " + b); return b; } @@ -490,7 +491,8 @@ private HttpURLConnection openHttpUrlConnection(final URL url) throws IOException { final HttpURLConnection conn; try { - conn = (HttpURLConnection) connectionFactory.openConnection(op, url); + conn = (HttpURLConnection) connectionFactory.openConnection(url, + op.getRequireAuth()); } catch (AuthenticationException e) { throw new IOException(e); } @@ -986,7 +988,8 @@ public void setDelegationToken( } } - private synchronized long renewDelegationToken(final Token token + @Override + public synchronized long renewDelegationToken(final Token token ) throws IOException { final HttpOpParam.Op op = PutOpParam.Op.RENEWDELEGATIONTOKEN; TokenArgumentParam dtargParam = new TokenArgumentParam( @@ -995,7 +998,8 @@ private synchronized long renewDelegationToken(final Token token return (Long) m.get("long"); } - private synchronized void cancelDelegationToken(final Token token + @Override + public synchronized void cancelDelegationToken(final Token token ) throws IOException { final HttpOpParam.Op op = PutOpParam.Op.CANCELDELEGATIONTOKEN; TokenArgumentParam dtargParam = new TokenArgumentParam( @@ -1041,57 +1045,4 @@ public MD5MD5CRC32FileChecksum getFileChecksum(final Path p final Map m = run(op, p); return JsonUtil.toMD5MD5CRC32FileChecksum(m); } - - /** Delegation token renewer. */ - public static class DtRenewer extends TokenRenewer { - @Override - public boolean handleKind(Text kind) { - return kind.equals(TOKEN_KIND); - } - - @Override - public boolean isManaged(Token token) throws IOException { - return true; - } - - private static WebHdfsFileSystem getWebHdfs( - final Token token, final Configuration conf) throws IOException { - - final InetSocketAddress nnAddr = SecurityUtil.getTokenServiceAddr(token); - final URI uri = DFSUtil.createUri(WebHdfsFileSystem.SCHEME, nnAddr); - return (WebHdfsFileSystem)FileSystem.get(uri, conf); - } - - @Override - public long renew(final Token token, final Configuration conf - ) throws IOException, InterruptedException { - return getWebHdfs(token, conf).renewDelegationToken(token); - } - - @Override - public void cancel(final Token token, final Configuration conf - ) throws IOException, InterruptedException { - getWebHdfs(token, conf).cancelDelegationToken(token); - } - } - - private static class WebHdfsDelegationTokenSelector - extends AbstractDelegationTokenSelector { - private static final DelegationTokenSelector hdfsTokenSelector = - new DelegationTokenSelector(); - - public WebHdfsDelegationTokenSelector() { - super(TOKEN_KIND); - } - - Token selectToken(URI nnUri, - Collection> tokens, Configuration conf) { - Token token = - selectToken(SecurityUtil.buildTokenService(nnUri), tokens); - if (token == null) { - token = hdfsTokenSelector.selectToken(nnUri, tokens, conf); - } - return token; - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer index dee43fcd08c..7efd684d696 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer @@ -13,5 +13,4 @@ # org.apache.hadoop.hdfs.DFSClient$Renewer org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier$Renewer -org.apache.hadoop.hdfs.web.HftpFileSystem$TokenManager -org.apache.hadoop.hdfs.web.WebHdfsFileSystem$DtRenewer +org.apache.hadoop.hdfs.web.TokenAspect$TokenManager diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 45133d09967..dbf4f4433cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -1516,6 +1516,18 @@ + + dfs.namenode.path.based.cache.retry.interval.ms + 60000 + + When the NameNode needs to uncache something that is cached, or cache + something that is not cached, it must direct the DataNodes to do so by + sending a DNA_CACHE or DNA_UNCACHE command in response to a DataNode + heartbeat. This parameter controls how frequently the NameNode will + resend these commands. + + + dfs.datanode.fsdatasetcache.max.threads.per.volume 4 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html index 6a7b57dcc47..1bda48e78a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html @@ -30,6 +30,12 @@
+
+
+

Hadoop, 2013.

+ +
+ -
-

Hadoop, 2013.

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp index 10872a7af09..2afba913088 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp @@ -70,6 +70,6 @@

Startup Progress

<% healthjsp.generateStartupProgress(out, nn.getStartupProgress()); %> -<% -out.println(ServletUtil.htmlFooter()); -%> +

Hadoop, 2013. New UI

+ + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/index.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/index.html index 7fc136b0c85..99bb13b326c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/index.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/index.html @@ -14,22 +14,22 @@ See the License for the specific language governing permissions and limitations under the License. --> - - + + + Hadoop Administration - - +

Hadoop Administration

- - - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java index ee39cfe5332..0a8ed3eb7f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java @@ -26,10 +26,14 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.io.IOUtils; import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.stubbing.Answer; + /** * This tests pipeline recovery related client protocol works correct or not. */ @@ -112,4 +116,55 @@ public class TestClientProtocolForPipelineRecovery { cluster.shutdown(); } } + + /** Test whether corrupt replicas are detected correctly during pipeline + * recoveries. + */ + @Test + public void testPipelineRecoveryForLastBlock() throws IOException { + DFSClientFaultInjector faultInjector + = Mockito.mock(DFSClientFaultInjector.class); + DFSClientFaultInjector oldInjector = DFSClientFaultInjector.instance; + DFSClientFaultInjector.instance = faultInjector; + Configuration conf = new HdfsConfiguration(); + + conf.setInt(DFSConfigKeys.DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 3); + MiniDFSCluster cluster = null; + + try { + int numDataNodes = 3; + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build(); + cluster.waitActive(); + FileSystem fileSys = cluster.getFileSystem(); + + Path file = new Path("dataprotocol1.dat"); + Mockito.when(faultInjector.failPacket()).thenReturn(true); + try { + DFSTestUtil.createFile(fileSys, file, 1L, (short)numDataNodes, 0L); + } catch (IOException e) { + // completeFile() should fail. + Assert.assertTrue(e.getMessage().startsWith("Unable to close file")); + return; + } + + // At this point, NN let data corruption to happen. + // Before failing test, try reading the file. It should fail. + FSDataInputStream in = fileSys.open(file); + try { + int c = in.read(); + // Test will fail with BlockMissingException if NN does not update the + // replica state based on the latest report. + } catch (org.apache.hadoop.hdfs.BlockMissingException bme) { + Assert.fail("Block is missing because the file was closed with" + + " corrupt replicas."); + } + Assert.fail("The file was closed with corrupt replicas, but read still" + + " works!"); + } finally { + DFSClientFaultInjector.instance = oldInjector; + if (cluster != null) { + cluster.shutdown(); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java index b9702ae60b9..13d54a3c995 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java @@ -86,6 +86,9 @@ public void testQuotaCommands() throws Exception { // Space quotas final int DEFAULT_BLOCK_SIZE = 512; conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE); + // Make it relinquish locks. When run serially, the result should + // be identical. + conf.setInt(DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, 2); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final FileSystem fs = cluster.getFileSystem(); assertTrue("Not a HDFS: "+fs.getUri(), @@ -350,6 +353,7 @@ public Object run() throws Exception { } assertTrue(hasException); + assertEquals(4, cluster.getNamesystem().getFSDirectory().getYieldCount()); } finally { cluster.shutdown(); } @@ -360,6 +364,9 @@ public Object run() throws Exception { @Test public void testNamespaceCommands() throws Exception { final Configuration conf = new HdfsConfiguration(); + // Make it relinquish locks. When run serially, the result should + // be identical. + conf.setInt(DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, 2); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final FileSystem fs = cluster.getFileSystem(); assertTrue("Not a HDFS: "+fs.getUri(), @@ -515,6 +522,7 @@ public void testNamespaceCommands() throws Exception { c = dfs.getContentSummary(quotaDir1); assertEquals(c.getDirectoryCount(), 6); assertEquals(c.getQuota(), 6); + assertEquals(14, cluster.getNamesystem().getFSDirectory().getYieldCount()); } finally { cluster.shutdown(); } @@ -532,6 +540,9 @@ public void testSpaceCommands() throws Exception { // set a smaller block size so that we can test with smaller // diskspace quotas conf.set(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, "512"); + // Make it relinquish locks. When run serially, the result should + // be identical. + conf.setInt(DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, 2); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final FileSystem fs = cluster.getFileSystem(); assertTrue("Not a HDFS: "+fs.getUri(), @@ -764,6 +775,7 @@ public void testSpaceCommands() throws Exception { assertEquals(c.getSpaceConsumed(), (sizeFactorA + sizeFactorB + sizeFactorC) * fileSpace); + assertEquals(20, cluster.getNamesystem().getFSDirectory().getYieldCount()); } finally { cluster.shutdown(); } @@ -905,6 +917,9 @@ public void testMultipleFilesSmallerThanOneBlock() throws Exception { final int BLOCK_SIZE = 6 * 1024; conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); + // Make it relinquish locks. When run serially, the result should + // be identical. + conf.setInt(DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, 2); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); @@ -971,6 +986,7 @@ public void testMultipleFilesSmallerThanOneBlock() throws Exception { exceededQuota = true; } assertTrue("Quota not exceeded", exceededQuota); + assertEquals(2, cluster.getNamesystem().getFSDirectory().getYieldCount()); } finally { cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java index b0e907fc1f7..a6909cb4788 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java @@ -28,8 +28,10 @@ import java.io.FileInputStream; import java.io.IOException; -import java.nio.MappedByteBuffer; +import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.util.HashSet; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -49,7 +51,6 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetCache.PageRounder; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlock; -import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlock.Mlocker; import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.protocol.BlockIdCommand; @@ -60,6 +61,7 @@ import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.io.nativeio.NativeIO; +import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheManipulator; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MetricsAsserts; @@ -87,8 +89,7 @@ public class TestFsDatasetCache { private static FsDatasetSpi fsd; private static DatanodeProtocolClientSideTranslatorPB spyNN; private static PageRounder rounder = new PageRounder(); - - private Mlocker mlocker; + private static CacheManipulator prevCacheManipulator; @Before public void setUp() throws Exception { @@ -96,6 +97,8 @@ public void setUp() throws Exception { assumeTrue(NativeIO.getMemlockLimit() >= CACHE_CAPACITY); conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS, + 500); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY); @@ -113,8 +116,19 @@ public void setUp() throws Exception { fsd = dn.getFSDataset(); spyNN = DataNodeTestUtils.spyOnBposToNN(dn, nn); - // Save the current mlocker and replace it at the end of the test - mlocker = MappableBlock.mlocker; + + prevCacheManipulator = NativeIO.POSIX.cacheManipulator; + + // Save the current CacheManipulator and replace it at the end of the test + // Stub out mlock calls to avoid failing when not enough memory is lockable + // by the operating system. + NativeIO.POSIX.cacheManipulator = new CacheManipulator() { + @Override + public void mlock(String identifier, + ByteBuffer mmap, long length) throws IOException { + LOG.info("mlocking " + identifier); + } + }; } @After @@ -125,8 +139,8 @@ public void tearDown() throws Exception { if (cluster != null) { cluster.shutdown(); } - // Restore the original mlocker - MappableBlock.mlocker = mlocker; + // Restore the original CacheManipulator + NativeIO.POSIX.cacheManipulator = prevCacheManipulator; } private static void setHeartbeatResponse(DatanodeCommand[] cmds) @@ -214,8 +228,7 @@ public Boolean get() { return expected; } - @Test(timeout=600000) - public void testCacheAndUncacheBlock() throws Exception { + private void testCacheAndUncacheBlock() throws Exception { LOG.info("beginning testCacheAndUncacheBlock"); final int NUM_BLOCKS = 5; @@ -268,6 +281,42 @@ public void testCacheAndUncacheBlock() throws Exception { LOG.info("finishing testCacheAndUncacheBlock"); } + @Test(timeout=600000) + public void testCacheAndUncacheBlockSimple() throws Exception { + testCacheAndUncacheBlock(); + } + + /** + * Run testCacheAndUncacheBlock with some failures injected into the mlock + * call. This tests the ability of the NameNode to resend commands. + */ + @Test(timeout=600000) + public void testCacheAndUncacheBlockWithRetries() throws Exception { + CacheManipulator prevCacheManipulator = NativeIO.POSIX.cacheManipulator; + + try { + NativeIO.POSIX.cacheManipulator = new CacheManipulator() { + private final Set seenIdentifiers = new HashSet(); + + @Override + public void mlock(String identifier, + ByteBuffer mmap, long length) throws IOException { + if (seenIdentifiers.contains(identifier)) { + // mlock succeeds the second time. + LOG.info("mlocking " + identifier); + return; + } + seenIdentifiers.add(identifier); + throw new IOException("injecting IOException during mlock of " + + identifier); + } + }; + testCacheAndUncacheBlock(); + } finally { + NativeIO.POSIX.cacheManipulator = prevCacheManipulator; + } + } + @Test(timeout=600000) public void testFilesExceedMaxLockedMemory() throws Exception { LOG.info("beginning testFilesExceedMaxLockedMemory"); @@ -357,10 +406,11 @@ public void testUncachingBlocksBeforeCachingFinishes() throws Exception { assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity); assertEquals("Unexpected amount of cache used", current, cacheUsed); - MappableBlock.mlocker = new MappableBlock.Mlocker() { + NativeIO.POSIX.cacheManipulator = new NativeIO.POSIX.CacheManipulator() { @Override - public void mlock(MappedByteBuffer mmap, long length) throws IOException { - LOG.info("An mlock operation is starting."); + public void mlock(String identifier, + ByteBuffer mmap, long length) throws IOException { + LOG.info("An mlock operation is starting on " + identifier); try { Thread.sleep(3000); } catch (InterruptedException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java index d399ddf856a..181161430d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java @@ -93,6 +93,10 @@ public void testCorruptFilesJsp() throws Exception { in.close(); } + try { + Thread.sleep(3000); // Wait for block reports. They shouldn't matter. + } catch (InterruptedException ie) {} + // verify if all corrupt files were reported to NN badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null); assertTrue("Expecting 3 corrupt files, but got " + badFiles.size(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPathBasedCacheRequests.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPathBasedCacheRequests.java index 890ccfb525c..7182aad7043 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPathBasedCacheRequests.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPathBasedCacheRequests.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.nio.MappedByteBuffer; import java.security.PrivilegedExceptionAction; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; @@ -60,6 +61,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlock; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.nativeio.NativeIO; +import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheManipulator; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; @@ -81,15 +83,7 @@ public class TestPathBasedCacheRequests { static private MiniDFSCluster cluster; static private DistributedFileSystem dfs; static private NamenodeProtocols proto; - - static { - MappableBlock.mlocker = new MappableBlock.Mlocker() { - @Override - public void mlock(MappedByteBuffer mmap, long length) throws IOException { - // Stubbed out for testing - } - }; - } + static private CacheManipulator prevCacheManipulator; @Before public void setup() throws Exception { @@ -101,6 +95,18 @@ public void setup() throws Exception { cluster.waitActive(); dfs = cluster.getFileSystem(); proto = cluster.getNameNodeRpc(); + prevCacheManipulator = NativeIO.POSIX.cacheManipulator; + + // Save the current CacheManipulator and replace it at the end of the test + // Stub out mlock calls to avoid failing when not enough memory is lockable + // by the operating system. + NativeIO.POSIX.cacheManipulator = new CacheManipulator() { + @Override + public void mlock(String identifier, + ByteBuffer mmap, long length) throws IOException { + LOG.info("mlocking " + identifier); + } + }; } @After @@ -108,6 +114,8 @@ public void teardown() throws Exception { if (cluster != null) { cluster.shutdown(); } + // Restore the original CacheManipulator + NativeIO.POSIX.cacheManipulator = prevCacheManipulator; } @Test(timeout=60000) @@ -552,8 +560,8 @@ public void testCacheManagerRestart() throws Exception { * @throws Exception */ private static void waitForCachedBlocks(NameNode nn, - final int expectedCachedBlocks, final int expectedCachedReplicas) - throws Exception { + final int expectedCachedBlocks, final int expectedCachedReplicas, + final String logString) throws Exception { final FSNamesystem namesystem = nn.getNamesystem(); final CacheManager cacheManager = namesystem.getCacheManager(); LOG.info("Waiting for " + expectedCachedBlocks + " blocks with " + @@ -581,9 +589,9 @@ public Boolean get() { (numCachedReplicas == expectedCachedReplicas)) { return true; } else { - LOG.info("cached blocks: have " + numCachedBlocks + - " / " + expectedCachedBlocks); - LOG.info("cached replicas: have " + numCachedReplicas + + LOG.info(logString + " cached blocks: have " + numCachedBlocks + + " / " + expectedCachedBlocks + ". " + + "cached replicas: have " + numCachedReplicas + " / " + expectedCachedReplicas); return false; } @@ -681,7 +689,7 @@ public void testWaitForCachedReplicas() throws Exception { paths.add(p.toUri().getPath()); } // Check the initial statistics at the namenode - waitForCachedBlocks(namenode, 0, 0); + waitForCachedBlocks(namenode, 0, 0, "testWaitForCachedReplicas:0"); // Cache and check each path in sequence int expected = 0; for (int i=0; i entries = @@ -701,7 +710,8 @@ public void testWaitForCachedReplicas() throws Exception { PathBasedCacheDirective directive = entries.next(); nnRpc.removePathBasedCacheDirective(directive.getId()); expected -= numBlocksPerFile; - waitForCachedBlocks(namenode, expected, expected); + waitForCachedBlocks(namenode, expected, expected, + "testWaitForCachedReplicas:2"); } } finally { cluster.shutdown(); @@ -735,7 +745,8 @@ public void testAddingPathBasedCacheDirectivesWhenCachingIsDisabled() paths.add(p.toUri().getPath()); } // Check the initial statistics at the namenode - waitForCachedBlocks(namenode, 0, 0); + waitForCachedBlocks(namenode, 0, 0, + "testAddingPathBasedCacheDirectivesWhenCachingIsDisabled:0"); // Cache and check each path in sequence int expected = 0; for (int i=0; i aspect = (TokenAspect) Whitebox.getInternalState(fs, "tokenAspect"); + // use ip-based tokens SecurityUtilTestHelper.setTokenServiceUseIp(true); @@ -208,7 +210,7 @@ private void checkTokenSelection(HftpFileSystem fs, ugi.addToken(hdfsToken); // test fallback to hdfs token - Token token = fs.selectDelegationToken(ugi); + Token token = aspect.selectDelegationToken(ugi); assertNotNull(token); assertEquals(hdfsToken, token); @@ -217,13 +219,13 @@ private void checkTokenSelection(HftpFileSystem fs, new byte[0], new byte[0], HftpFileSystem.TOKEN_KIND, new Text("127.0.0.1:"+port)); ugi.addToken(hftpToken); - token = fs.selectDelegationToken(ugi); + token = aspect.selectDelegationToken(ugi); assertNotNull(token); assertEquals(hftpToken, token); // switch to using host-based tokens, no token should match SecurityUtilTestHelper.setTokenServiceUseIp(false); - token = fs.selectDelegationToken(ugi); + token = aspect.selectDelegationToken(ugi); assertNull(token); // test fallback to hdfs token @@ -232,7 +234,7 @@ private void checkTokenSelection(HftpFileSystem fs, DelegationTokenIdentifier.HDFS_DELEGATION_KIND, new Text("localhost:8020")); ugi.addToken(hdfsToken); - token = fs.selectDelegationToken(ugi); + token = aspect.selectDelegationToken(ugi); assertNotNull(token); assertEquals(hdfsToken, token); @@ -241,36 +243,8 @@ private void checkTokenSelection(HftpFileSystem fs, new byte[0], new byte[0], HftpFileSystem.TOKEN_KIND, new Text("localhost:"+port)); ugi.addToken(hftpToken); - token = fs.selectDelegationToken(ugi); + token = aspect.selectDelegationToken(ugi); assertNotNull(token); assertEquals(hftpToken, token); } - - static class MyHftpFileSystem extends HftpFileSystem { - @Override - public URI getCanonicalUri() { - return super.getCanonicalUri(); - } - @Override - public int getDefaultPort() { - return super.getDefaultPort(); - } - // don't automatically get a token - @Override - protected void initDelegationToken() throws IOException {} - } - - static class MyHsftpFileSystem extends HsftpFileSystem { - @Override - public URI getCanonicalUri() { - return super.getCanonicalUri(); - } - @Override - public int getDefaultPort() { - return super.getDefaultPort(); - } - // don't automatically get a token - @Override - protected void initDelegationToken() throws IOException {} - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestTokenAspect.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestTokenAspect.java new file mode 100644 index 00000000000..abee3a25566 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestTokenAspect.java @@ -0,0 +1,298 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.web; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DelegationTokenRenewer; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.SecurityUtilTestHelper; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.Progressable; +import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.internal.util.reflection.Whitebox; + +public class TestTokenAspect { + + private static class DummyFs extends FileSystem implements + DelegationTokenRenewer.Renewable, TokenAspect.TokenManagementDelegator { + + private static final Text TOKEN_KIND = new Text("DummyFS Token"); + private boolean emulateSecurityEnabled; + private TokenAspect tokenAspect; + private UserGroupInformation ugi = UserGroupInformation + .createUserForTesting("foo", new String[] { "bar" }); + private URI uri; + + @Override + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + return null; + } + + @Override + public void cancelDelegationToken(Token token) throws IOException { + } + + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + return null; + } + + @Override + public boolean delete(Path f, boolean recursive) throws IOException { + return false; + } + + @Override + public URI getCanonicalUri() { + return super.getCanonicalUri(); + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + return null; + } + + @Override + public Token getRenewToken() { + return null; + } + + @Override + public URI getUri() { + return uri; + } + + @Override + public Path getWorkingDirectory() { + return null; + } + + @Override + public void initialize(URI name, Configuration conf) throws IOException { + super.initialize(name, conf); + setConf(conf); + try { + this.uri = new URI(name.getScheme(), name.getAuthority(), null, null, + null); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + + tokenAspect = new TokenAspect(this, DummyFs.TOKEN_KIND); + if (emulateSecurityEnabled || UserGroupInformation.isSecurityEnabled()) { + tokenAspect.initDelegationToken(ugi); + } + } + + @Override + public FileStatus[] listStatus(Path f) throws FileNotFoundException, + IOException { + return null; + } + + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + return false; + } + + @Override + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + return null; + } + + @Override + public boolean rename(Path src, Path dst) throws IOException { + return false; + } + + @Override + public long renewDelegationToken(Token token) throws IOException { + return 0; + } + + @Override + public void setDelegationToken(Token token) { + } + + @Override + public void setWorkingDirectory(Path new_dir) { + } + } + + @Test + public void testGetRemoteToken() throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + UserGroupInformation.setConfiguration(conf); + DummyFs fs = spy(new DummyFs()); + Token token = new Token(new byte[0], + new byte[0], DummyFs.TOKEN_KIND, new Text("127.0.0.1:1234")); + + doReturn(token).when(fs).getDelegationToken(anyString()); + fs.initialize(new URI("dummyfs://127.0.0.1:1234"), conf); + + fs.tokenAspect.ensureTokenInitialized(); + + // Select a token, store and renew it + verify(fs).setDelegationToken(token); + assertNotNull(Whitebox.getInternalState(fs.tokenAspect, "dtRenewer")); + assertNotNull(Whitebox.getInternalState(fs.tokenAspect, "action")); + } + + @Test + public void testGetRemoteTokenFailure() throws IOException, + URISyntaxException { + Configuration conf = new Configuration(); + UserGroupInformation.setConfiguration(conf); + DummyFs fs = spy(new DummyFs()); + IOException e = new IOException(); + doThrow(e).when(fs).getDelegationToken(anyString()); + + fs.emulateSecurityEnabled = true; + fs.initialize(new URI("dummyfs://127.0.0.1:1234"), conf); + try { + fs.tokenAspect.ensureTokenInitialized(); + } catch (IOException exc) { + assertEquals(e, exc); + } + } + + @Test + public void testInitWithNoTokens() throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + UserGroupInformation.setConfiguration(conf); + DummyFs fs = spy(new DummyFs()); + doReturn(null).when(fs).getDelegationToken(anyString()); + fs.initialize(new URI("dummyfs://127.0.0.1:1234"), conf); + + fs.tokenAspect.ensureTokenInitialized(); + + // No token will be selected. + verify(fs, never()).setDelegationToken( + Mockito.> any()); + } + + @Test + public void testInitWithUGIToken() throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + UserGroupInformation.setConfiguration(conf); + DummyFs fs = spy(new DummyFs()); + doReturn(null).when(fs).getDelegationToken(anyString()); + + Token token = new Token(new byte[0], + new byte[0], DummyFs.TOKEN_KIND, new Text("127.0.0.1:1234")); + fs.ugi.addToken(token); + fs.ugi.addToken(new Token(new byte[0], new byte[0], + new Text("Other token"), new Text("127.0.0.1:8021"))); + assertEquals("wrong tokens in user", 2, fs.ugi.getTokens().size()); + + fs.emulateSecurityEnabled = true; + fs.initialize(new URI("dummyfs://127.0.0.1:1234"), conf); + fs.tokenAspect.ensureTokenInitialized(); + + // Select a token from ugi (not from the remote host), store it but don't + // renew it + verify(fs).setDelegationToken(token); + verify(fs, never()).getDelegationToken(anyString()); + assertNull(Whitebox.getInternalState(fs.tokenAspect, "dtRenewer")); + assertNull(Whitebox.getInternalState(fs.tokenAspect, "action")); + } + + @Test + public void testTokenSelectionPreferences() throws IOException, + URISyntaxException { + Configuration conf = new Configuration(); + DummyFs fs = spy(new DummyFs()); + doReturn(null).when(fs).getDelegationToken(anyString()); + fs.initialize(new URI("dummyfs://localhost:1234"), conf); + TokenAspect aspect = new TokenAspect(fs, + DummyFs.TOKEN_KIND); + UserGroupInformation ugi = UserGroupInformation.createUserForTesting("foo", + new String[] { "bar" }); + UserGroupInformation.setConfiguration(conf); + + // use ip-based tokens + SecurityUtilTestHelper.setTokenServiceUseIp(true); + + // test fallback to hdfs token + Token hdfsToken = new Token(new byte[0], + new byte[0], DelegationTokenIdentifier.HDFS_DELEGATION_KIND, new Text( + "127.0.0.1:8020")); + ugi.addToken(hdfsToken); + + // test fallback to hdfs token + Token token = aspect.selectDelegationToken(ugi); + assertEquals(hdfsToken, token); + + // test dummyfs is favored over hdfs + Token dummyFsToken = new Token( + new byte[0], new byte[0], DummyFs.TOKEN_KIND, + new Text("127.0.0.1:1234")); + ugi.addToken(dummyFsToken); + token = aspect.selectDelegationToken(ugi); + assertEquals(dummyFsToken, token); + + // switch to using host-based tokens, no token should match + SecurityUtilTestHelper.setTokenServiceUseIp(false); + token = aspect.selectDelegationToken(ugi); + assertNull(token); + + // test fallback to hdfs token + hdfsToken = new Token(new byte[0], new byte[0], + DelegationTokenIdentifier.HDFS_DELEGATION_KIND, new Text( + "localhost:8020")); + ugi.addToken(hdfsToken); + token = aspect.selectDelegationToken(ugi); + assertEquals(hdfsToken, token); + + // test dummyfs is favored over hdfs + dummyFsToken = new Token(new byte[0], new byte[0], + DummyFs.TOKEN_KIND, new Text("localhost:1234")); + ugi.addToken(dummyFsToken); + token = aspect.selectDelegationToken(ugi); + assertEquals(dummyFsToken, token); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestDelegationTokenRemoteFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestDelegationTokenRemoteFetcher.java index 259fdc7fcf2..2b090d27170 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestDelegationTokenRemoteFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestDelegationTokenRemoteFetcher.java @@ -26,6 +26,8 @@ import java.io.IOException; import java.net.InetSocketAddress; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.charset.Charset; import java.util.Iterator; import java.util.Map; @@ -37,10 +39,12 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.tools.DelegationTokenFetcher; import org.apache.hadoop.hdfs.web.HftpFileSystem; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.token.Token; import org.apache.log4j.Logger; import org.jboss.netty.bootstrap.ServerBootstrap; @@ -59,6 +63,7 @@ import org.jboss.netty.handler.codec.http.DefaultHttpResponse; import org.jboss.netty.handler.codec.http.HttpChunkAggregator; import org.jboss.netty.handler.codec.http.HttpHeaders; +import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpRequest; import org.jboss.netty.handler.codec.http.HttpRequestDecoder; import org.jboss.netty.handler.codec.http.HttpResponse; @@ -78,9 +83,10 @@ public class TestDelegationTokenRemoteFetcher { private static final String EXP_DATE = "124123512361236"; private static final String tokenFile = "http.file.dta"; + private static final URLConnectionFactory connectionFactory = URLConnectionFactory.DEFAULT_CONNECTION_FACTORY; private int httpPort; - private String serviceUrl; + private URI serviceUrl; private FileSystem fileSys; private Configuration conf; private ServerBootstrap bootstrap; @@ -92,7 +98,7 @@ public void init() throws Exception { conf = new Configuration(); fileSys = FileSystem.getLocal(conf); httpPort = NetUtils.getFreeSocketPort(); - serviceUrl = "http://localhost:" + httpPort; + serviceUrl = new URI("http://localhost:" + httpPort); testToken = createToken(serviceUrl); } @@ -121,9 +127,9 @@ public void testTokenFetchFail() throws Exception { * try to fetch token without http server with IOException */ @Test - public void testTokenRenewFail() { + public void testTokenRenewFail() throws AuthenticationException { try { - DelegationTokenFetcher.renewDelegationToken(serviceUrl, testToken); + DelegationTokenFetcher.renewDelegationToken(connectionFactory, serviceUrl, testToken); fail("Token fetcher shouldn't be able to renew tokens in absense of NN"); } catch (IOException ex) { } @@ -133,9 +139,9 @@ public void testTokenRenewFail() { * try cancel token without http server with IOException */ @Test - public void expectedTokenCancelFail() { + public void expectedTokenCancelFail() throws AuthenticationException { try { - DelegationTokenFetcher.cancelDelegationToken(serviceUrl, testToken); + DelegationTokenFetcher.cancelDelegationToken(connectionFactory, serviceUrl, testToken); fail("Token fetcher shouldn't be able to cancel tokens in absense of NN"); } catch (IOException ex) { } @@ -145,11 +151,12 @@ public void expectedTokenCancelFail() { * try fetch token and get http response with error */ @Test - public void expectedTokenRenewErrorHttpResponse() { + public void expectedTokenRenewErrorHttpResponse() + throws AuthenticationException, URISyntaxException { bootstrap = startHttpServer(httpPort, testToken, serviceUrl); try { - DelegationTokenFetcher.renewDelegationToken(serviceUrl + "/exception", - createToken(serviceUrl)); + DelegationTokenFetcher.renewDelegationToken(connectionFactory, new URI( + serviceUrl.toString() + "/exception"), createToken(serviceUrl)); fail("Token fetcher shouldn't be able to renew tokens using an invalid" + " NN URL"); } catch (IOException ex) { @@ -159,13 +166,14 @@ public void expectedTokenRenewErrorHttpResponse() { } /** - * * */ @Test - public void testCancelTokenFromHttp() throws IOException { + public void testCancelTokenFromHttp() throws IOException, + AuthenticationException { bootstrap = startHttpServer(httpPort, testToken, serviceUrl); - DelegationTokenFetcher.cancelDelegationToken(serviceUrl, testToken); + DelegationTokenFetcher.cancelDelegationToken(connectionFactory, serviceUrl, + testToken); if (assertionError != null) throw assertionError; } @@ -174,11 +182,12 @@ public void testCancelTokenFromHttp() throws IOException { * Call renew token using http server return new expiration time */ @Test - public void testRenewTokenFromHttp() throws IOException { + public void testRenewTokenFromHttp() throws IOException, + NumberFormatException, AuthenticationException { bootstrap = startHttpServer(httpPort, testToken, serviceUrl); assertTrue("testRenewTokenFromHttp error", Long.valueOf(EXP_DATE) == DelegationTokenFetcher.renewDelegationToken( - serviceUrl, testToken)); + connectionFactory, serviceUrl, testToken)); if (assertionError != null) throw assertionError; } @@ -204,11 +213,11 @@ public void expectedTokenIsRetrievedFromHttp() throws Exception { throw assertionError; } - private static Token createToken(String serviceUri) { + private static Token createToken(URI serviceUri) { byte[] pw = "hadoop".getBytes(); byte[] ident = new DelegationTokenIdentifier(new Text("owner"), new Text( "renewer"), new Text("realuser")).getBytes(); - Text service = new Text(serviceUri); + Text service = new Text(serviceUri.toString()); return new Token(ident, pw, HftpFileSystem.TOKEN_KIND, service); } @@ -301,8 +310,15 @@ public CredentialsLogicHandler(Token token, public void messageReceived(ChannelHandlerContext ctx, final MessageEvent e) throws Exception { HttpRequest request = (HttpRequest) e.getMessage(); - if (request.getMethod() != GET) { - return; + + if (request.getMethod() == HttpMethod.OPTIONS) { + // Mimic SPNEGO authentication + HttpResponse response = new DefaultHttpResponse(HTTP_1_1, + HttpResponseStatus.OK); + response.addHeader("Set-Cookie", "hadoop-auth=1234"); + e.getChannel().write(response).addListener(ChannelFutureListener.CLOSE); + } else if (request.getMethod() != GET) { + e.getChannel().close(); } UnmodifiableIterator> iter = routes.entrySet() .iterator(); @@ -338,7 +354,7 @@ public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) } private ServerBootstrap startHttpServer(int port, - final Token token, final String url) { + final Token token, final URI url) { ServerBootstrap bootstrap = new ServerBootstrap( new NioServerSocketChannelFactory(Executors.newCachedThreadPool(), Executors.newCachedThreadPool())); @@ -348,7 +364,7 @@ private ServerBootstrap startHttpServer(int port, public ChannelPipeline getPipeline() throws Exception { return Channels.pipeline(new HttpRequestDecoder(), new HttpChunkAggregator(65536), new HttpResponseEncoder(), - new CredentialsLogicHandler(token, url)); + new CredentialsLogicHandler(token, url.toString())); } }); bootstrap.bind(new InetSocketAddress("localhost", port)); diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index ca1e4483b22..b7aeb591e76 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -170,6 +170,14 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5613. DefaultSpeculator holds and checks hashmap that is always empty (Gera Shegalov via Sandy Ryza) + MAPREDUCE-5431. Missing pom dependency in MR-client (Timothy St. Clair + via stevel) + + MAPREDUCE-5624 Move grizzly-test and junit dependencies to test scope + (Ted Yu via stevel) + + MAPREDUCE-5481. Enable uber jobs to have multiple reducers (Sandy Ryza) + OPTIMIZATIONS MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza) @@ -209,6 +217,9 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5610. TestSleepJob fails in jdk7 (Jonathan Eagles via jlowe) + MAPREDUCE-5616. MR Client-AppMaster RPC max retries on socket timeout is too + high. (cnauroth) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java index dd157282ae5..578cdcdb91a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java @@ -20,7 +20,9 @@ import java.io.File; import java.io.IOException; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; @@ -35,6 +37,7 @@ import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobCounter; import org.apache.hadoop.mapreduce.MRConfig; +import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; @@ -168,6 +171,10 @@ private class SubtaskRunner implements Runnable { public void run() { ContainerLauncherEvent event = null; + // Collect locations of map outputs to give to reduces + Map localMapFiles = + new HashMap(); + // _must_ either run subtasks sequentially or accept expense of new JVMs // (i.e., fork()), else will get weird failures when maps try to create/ // write same dirname or filename: no chdir() in Java @@ -223,7 +230,7 @@ public void run() { context.getEventHandler().handle(jce); } runSubtask(remoteTask, ytask.getType(), attemptID, numMapTasks, - (numReduceTasks > 0)); + (numReduceTasks > 0), localMapFiles); } catch (RuntimeException re) { JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptID.getTaskId().getJobId()); @@ -265,7 +272,8 @@ private void runSubtask(org.apache.hadoop.mapred.Task task, final TaskType taskType, TaskAttemptId attemptID, final int numMapTasks, - boolean renameOutputs) + boolean renameOutputs, + Map localMapFiles) throws RuntimeException, IOException { org.apache.hadoop.mapred.TaskAttemptID classicAttemptID = TypeConverter.fromYarn(attemptID); @@ -309,7 +317,9 @@ private void runSubtask(org.apache.hadoop.mapred.Task task, map.run(conf, umbilical); if (renameOutputs) { - renameMapOutputForReduce(conf, attemptID, map.getMapOutputFile()); + MapOutputFile renamed = renameMapOutputForReduce(conf, attemptID, + map.getMapOutputFile()); + localMapFiles.put(classicAttemptID, renamed); } relocalize(); @@ -335,10 +345,11 @@ private void runSubtask(org.apache.hadoop.mapred.Task task, conf.set(MRConfig.MASTER_ADDRESS, "local"); // bypass shuffle ReduceTask reduce = (ReduceTask)task; + reduce.setLocalMapFiles(localMapFiles); reduce.setConf(conf); reduce.run(conf, umbilical); - //relocalize(); // needed only if more than one reducer supported (is MAPREDUCE-434 fixed yet?) + relocalize(); } } catch (FSError e) { @@ -387,15 +398,16 @@ private void runSubtask(org.apache.hadoop.mapred.Task task, * so there are no particular compatibility issues.) */ @SuppressWarnings("deprecation") - private void renameMapOutputForReduce(JobConf conf, TaskAttemptId mapId, - MapOutputFile subMapOutputFile) - throws IOException { + private MapOutputFile renameMapOutputForReduce(JobConf conf, + TaskAttemptId mapId, MapOutputFile subMapOutputFile) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); // move map output to reduce input Path mapOut = subMapOutputFile.getOutputFile(); FileStatus mStatus = localFs.getFileStatus(mapOut); Path reduceIn = subMapOutputFile.getInputFileForWrite( TypeConverter.fromYarn(mapId).getTaskID(), mStatus.getLen()); + Path mapOutIndex = new Path(mapOut.toString() + ".index"); + Path reduceInIndex = new Path(reduceIn.toString() + ".index"); if (LOG.isDebugEnabled()) { LOG.debug("Renaming map output file for task attempt " + mapId.toString() + " from original location " + mapOut.toString() @@ -407,6 +419,10 @@ private void renameMapOutputForReduce(JobConf conf, TaskAttemptId mapId, } if (!localFs.rename(mapOut, reduceIn)) throw new IOException("Couldn't rename " + mapOut); + if (!localFs.rename(mapOutIndex, reduceInIndex)) + throw new IOException("Couldn't rename " + mapOutIndex); + + return new RenamedMapOutputFile(reduceIn); } /** @@ -441,5 +457,70 @@ private void relocalize() { } } // end SubtaskRunner + + private static class RenamedMapOutputFile extends MapOutputFile { + private Path path; + + public RenamedMapOutputFile(Path path) { + this.path = path; + } + + @Override + public Path getOutputFile() throws IOException { + return path; + } + + @Override + public Path getOutputFileForWrite(long size) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getOutputFileForWriteInVolume(Path existing) { + throw new UnsupportedOperationException(); + } + @Override + public Path getOutputIndexFile() throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getOutputIndexFileForWrite(long size) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getOutputIndexFileForWriteInVolume(Path existing) { + throw new UnsupportedOperationException(); + } + @Override + public Path getSpillFile(int spillNumber) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getSpillFileForWrite(int spillNumber, long size) + throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getSpillIndexFile(int spillNumber) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getSpillIndexFileForWrite(int spillNumber, long size) + throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getInputFile(int mapId) throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public Path getInputFileForWrite(TaskID mapId, long size) + throws IOException { + throw new UnsupportedOperationException(); + } + @Override + public void removeAll() throws IOException { + throw new UnsupportedOperationException(); + } + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index c884a51cbf9..5f08303b36f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -1173,11 +1173,7 @@ private void makeUberDecision(long dataInputLength) { // these are no longer "system" settings, necessarily; user may override int sysMaxMaps = conf.getInt(MRJobConfig.JOB_UBERTASK_MAXMAPS, 9); - //FIXME: handling multiple reduces within a single AM does not seem to - //work. int sysMaxReduces = conf.getInt(MRJobConfig.JOB_UBERTASK_MAXREDUCES, 1); - boolean isValidUberMaxReduces = (sysMaxReduces == 0) - || (sysMaxReduces == 1); long sysMaxBytes = conf.getLong(MRJobConfig.JOB_UBERTASK_MAXBYTES, fs.getDefaultBlockSize(this.remoteJobSubmitDir)); // FIXME: this is wrong; get FS from @@ -1225,7 +1221,7 @@ private void makeUberDecision(long dataInputLength) { // and thus requires sequential execution. isUber = uberEnabled && smallNumMapTasks && smallNumReduceTasks && smallInput && smallMemory && smallCpu - && notChainJob && isValidUberMaxReduces; + && notChainJob; if (isUber) { LOG.info("Uberizing job " + jobId + ": " + numMapTasks + "m+" @@ -1259,8 +1255,6 @@ private void makeUberDecision(long dataInputLength) { msg.append(" too much RAM;"); if (!notChainJob) msg.append(" chainjob;"); - if (!isValidUberMaxReduces) - msg.append(" not supported uber max reduces"); LOG.info(msg.toString()); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 76097de896f..2622ec5da66 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -357,13 +357,21 @@ public interface MRJobConfig { public static final String MR_AM_PREFIX = MR_PREFIX + "am."; - /** The number of client retires to the AM - before reconnecting to the RM + /** The number of client retries to the AM - before reconnecting to the RM * to fetch Application State. */ public static final String MR_CLIENT_TO_AM_IPC_MAX_RETRIES = MR_PREFIX + "client-am.ipc.max-retries"; public static final int DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES = 3; + /** The number of client retries on socket timeouts to the AM - before + * reconnecting to the RM to fetch Application Status. + */ + public static final String MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = + MR_PREFIX + "yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts"; + public static final int + DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = 3; + /** * The number of client retries to the RM/HS before throwing exception. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 4a228820b23..fcaa275cc1a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1010,6 +1010,13 @@ to the RM to fetch Application Status.
+ + yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts + 3 + The number of client retries on socket timeouts to the AM - before + reconnecting to the RM to fetch Application Status. + + yarn.app.mapreduce.client.max-retries 3 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java index d9dd777efa5..c3eee2cfdcf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java @@ -107,6 +107,10 @@ public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES)); + this.conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS, + MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS)); this.rm = rm; this.jobId = jobId; this.historyServerProxy = historyServerProxy; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java index 868c2d5ae3d..32199e55410 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; +import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -39,12 +40,14 @@ public class TestUberAM extends TestMRJobs { private static final Log LOG = LogFactory.getLog(TestUberAM.class); - + private int numSleepReducers; + @BeforeClass public static void setup() throws IOException { TestMRJobs.setup(); if (mrCluster != null) { mrCluster.getConfig().setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, true); + mrCluster.getConfig().setInt(MRJobConfig.JOB_UBERTASK_MAXREDUCES, 3); } } @@ -52,8 +55,19 @@ public static void setup() throws IOException { @Test public void testSleepJob() throws IOException, InterruptedException, ClassNotFoundException { + numSleepReducers = 1; if (mrCluster != null) { - mrCluster.getConfig().setInt("TestMRJobs.testSleepJob.reduces", 1); + mrCluster.getConfig().setInt("TestMRJobs.testSleepJob.reduces", numSleepReducers); + } + super.testSleepJob(); + } + + @Test + public void testSleepJobWithMultipleReducers() + throws IOException, InterruptedException, ClassNotFoundException { + numSleepReducers = 3; + if (mrCluster != null) { + mrCluster.getConfig().setInt("TestMRJobs.testSleepJob.reduces", numSleepReducers); } super.testSleepJob(); } @@ -67,7 +81,7 @@ protected void verifySleepJobCounters(Job job) throws InterruptedException, .getValue()); Assert.assertEquals(3, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS) .getValue()); - Assert.assertEquals(1, + Assert.assertEquals(numSleepReducers, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES).getValue()); Assert .assertTrue(counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS) != null @@ -76,11 +90,11 @@ protected void verifySleepJobCounters(Job job) throws InterruptedException, .assertTrue(counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS) != null && counters.findCounter(JobCounter.SLOTS_MILLIS_MAPS).getValue() != 0); - Assert.assertEquals(3, counters.findCounter(JobCounter.NUM_UBER_SUBMAPS) - .getValue()); - Assert.assertEquals(1, counters.findCounter(JobCounter.NUM_UBER_SUBREDUCES) - .getValue()); - Assert.assertEquals(4, + Assert.assertEquals(3, + counters.findCounter(JobCounter.NUM_UBER_SUBMAPS).getValue()); + Assert.assertEquals(numSleepReducers, + counters.findCounter(JobCounter.NUM_UBER_SUBREDUCES).getValue()); + Assert.assertEquals(3 + numSleepReducers, counters.findCounter(JobCounter.TOTAL_LAUNCHED_UBERTASKS).getValue()); } @@ -138,8 +152,10 @@ public void testFailingMapper() TaskCompletionEvent[] events = job.getTaskCompletionEvents(0, 2); Assert.assertEquals(1, events.length); - Assert.assertEquals(TaskCompletionEvent.Status.TIPFAILED, - events[0].getStatus()); + // TIPFAILED if it comes from the AM, FAILED if it comes from the JHS + TaskCompletionEvent.Status status = events[0].getStatus(); + Assert.assertTrue(status == TaskCompletionEvent.Status.FAILED || + status == TaskCompletionEvent.Status.TIPFAILED); Assert.assertEquals(JobStatus.State.FAILED, job.getJobState()); //Disabling till UberAM honors MRJobConfig.MAP_MAX_ATTEMPTS diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml index f138b3db1af..6657662b35d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml @@ -158,6 +158,16 @@ commons-lang provided + + commons-collections + commons-collections + provided + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + test + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml index 199791eedbf..a94e663ee8a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml @@ -93,6 +93,11 @@ test-jar test + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + test + org.apache.hadoop hadoop-mapreduce-client-hs diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index a531e543bf6..2c13eb4440c 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -324,8 +324,8 @@ org.apache.commons - commons-math - 2.1 + commons-math3 + 3.1.1 org.apache.commons @@ -745,6 +745,11 @@ hadoop-sls ${project.version} + + com.google.code.findbugs + jsr305 + 1.3.9 + diff --git a/hadoop-tools/hadoop-archives/pom.xml b/hadoop-tools/hadoop-archives/pom.xml index f472c6f3e5e..5334f48ff47 100644 --- a/hadoop-tools/hadoop-archives/pom.xml +++ b/hadoop-tools/hadoop-archives/pom.xml @@ -35,6 +35,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-annotations diff --git a/hadoop-tools/hadoop-datajoin/pom.xml b/hadoop-tools/hadoop-datajoin/pom.xml index 984682ff5cc..a4d00f8cf88 100644 --- a/hadoop-tools/hadoop-datajoin/pom.xml +++ b/hadoop-tools/hadoop-datajoin/pom.xml @@ -32,6 +32,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-annotations diff --git a/hadoop-tools/hadoop-distcp/pom.xml b/hadoop-tools/hadoop-distcp/pom.xml index aaf2d59579b..1768a265d5f 100644 --- a/hadoop-tools/hadoop-distcp/pom.xml +++ b/hadoop-tools/hadoop-distcp/pom.xml @@ -37,6 +37,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-common diff --git a/hadoop-tools/hadoop-extras/pom.xml b/hadoop-tools/hadoop-extras/pom.xml index 1087b53dad1..eb9aad8451e 100644 --- a/hadoop-tools/hadoop-extras/pom.xml +++ b/hadoop-tools/hadoop-extras/pom.xml @@ -35,6 +35,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-annotations diff --git a/hadoop-tools/hadoop-gridmix/pom.xml b/hadoop-tools/hadoop-gridmix/pom.xml index 1aeaa73b759..a3cac91406a 100644 --- a/hadoop-tools/hadoop-gridmix/pom.xml +++ b/hadoop-tools/hadoop-gridmix/pom.xml @@ -32,6 +32,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-annotations diff --git a/hadoop-tools/hadoop-rumen/pom.xml b/hadoop-tools/hadoop-rumen/pom.xml index abeaf535eb8..0475128a363 100644 --- a/hadoop-tools/hadoop-rumen/pom.xml +++ b/hadoop-tools/hadoop-rumen/pom.xml @@ -35,6 +35,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-annotations diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index 7265c0468d5..b6e4931fc71 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -36,6 +36,11 @@ + + junit + junit + test + org.apache.hadoop hadoop-annotations diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index f133b559209..c7551f3a194 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -40,6 +40,9 @@ Release 2.3.0 - UNRELEASED YARN-311. RM/scheduler support for dynamic resource configuration. (Junping Du via llu) + YARN-1392. Allow sophisticated app-to-queue placement policies in the Fair + Scheduler (Sandy Ryza) + IMPROVEMENTS YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu) @@ -94,6 +97,9 @@ Release 2.3.0 - UNRELEASED YARN-1387. RMWebServices should use ClientRMService for filtering applications (Karthik Kambatla via Sandy Ryza) + YARN-1222. Make improvements in ZKRMStateStore for fencing (Karthik + Kambatla via bikas) + OPTIMIZATIONS BUG FIXES @@ -127,6 +133,9 @@ Release 2.3.0 - UNRELEASED YARN-1400. yarn.cmd uses HADOOP_RESOURCEMANAGER_OPTS. Should be YARN_RESOURCEMANAGER_OPTS. (Raja Aluri via cnauroth) + YARN-1401. With zero sleep-delay-before-sigkill.ms, no signal is ever sent + (Gera Shegalov via Sandy Ryza) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 104ef4a5398..b25466533a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -178,6 +178,12 @@ + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java index 5512dcda1bd..fdec50967a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java @@ -193,8 +193,8 @@ private static String getConfKeyForRMInstance(String prefix, return addSuffix(prefix, getRMHAId(conf)); } - private static String getConfValueForRMInstance(String prefix, - Configuration conf) { + public static String getConfValueForRMInstance(String prefix, + Configuration conf) { String confKey = getConfKeyForRMInstance(prefix, conf); String retVal = conf.getTrimmed(confKey); if (LOG.isTraceEnabled()) { @@ -205,8 +205,8 @@ private static String getConfValueForRMInstance(String prefix, return retVal; } - static String getConfValueForRMInstance(String prefix, String defaultValue, - Configuration conf) { + public static String getConfValueForRMInstance( + String prefix, String defaultValue, Configuration conf) { String value = getConfValueForRMInstance(prefix, conf); return (value == null) ? defaultValue : value; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 09f6b6e6dc3..1e4c71903ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -328,6 +328,8 @@ public class YarnConfiguration extends Configuration { ZK_STATE_STORE_PREFIX + "acl"; public static final String DEFAULT_ZK_RM_STATE_STORE_ACL = "world:anyone:rwcda"; + public static final String ZK_RM_STATE_STORE_ROOT_NODE_ACL = + ZK_STATE_STORE_PREFIX + "root-node.acl"; /** The maximum number of completed applications RM keeps. */ public static final String RM_MAX_COMPLETED_APPLICATIONS = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 86501ad7799..7f6e050d2b5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -279,7 +279,11 @@ Host:Port of the ZooKeeper server where RM state will be stored. This must be supplied when using org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore - as the value for yarn.resourcemanager.store.class + as the value for yarn.resourcemanager.store.class. ZKRMStateStore + is implicitly fenced, meaning a single ResourceManager is + able to use the store at any point in time. More details on this, along + with setting up appropriate ACLs is discussed under the description for + yarn.resourcemanager.zk.state-store.root-node.acl. yarn.resourcemanager.zk.state-store.address @@ -320,6 +324,31 @@ world:anyone:rwcda + + + ACLs to be used for the root znode when using ZKRMStateStore in a HA + scenario for fencing. + + ZKRMStateStore supports implicit fencing to allow a single + ResourceManager write-access to the store. For fencing, the + ResourceManagers in the cluster share read-write-admin privileges on the + root node, but the Active ResourceManager claims exclusive create-delete + permissions. + + By default, when this property is not set, we use the ACLs from + yarn.resourcemanager.zk.state-store.acl for shared admin access and + rm-address:cluster-timestamp for username-based exclusive create-delete + access. + + This property allows users to set ACLs of their choice instead of using + the default mechanism. For fencing to work, the ACLs should be + carefully set differently on each ResourceManger such that all the + ResourceManagers have shared admin access and the Active ResourceManger + takes over (exclusively) the create-delete access. + + yarn.resourcemanager.zk.state-store.root-node.acl + + URI pointing to the location of the FileSystem path where RM state will be stored. This must be supplied when using diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index edc31466f67..8b08965b15d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -375,13 +375,19 @@ public void cleanupContainer() throws IOException { LOG.debug("Sending signal to pid " + processId + " as user " + user + " for container " + containerIdStr); + + final Signal signal = sleepDelayBeforeSigKill > 0 + ? Signal.TERM + : Signal.KILL; + + boolean result = exec.signalContainer(user, processId, signal); + + LOG.debug("Sent signal " + signal + " to pid " + processId + + " as user " + user + + " for container " + containerIdStr + + ", result=" + (result? "success" : "failed")); + if (sleepDelayBeforeSigKill > 0) { - boolean result = exec.signalContainer(user, - processId, Signal.TERM); - LOG.debug("Sent signal to pid " + processId - + " as user " + user - + " for container " + containerIdStr - + ", result=" + (result? "success" : "failed")); new DelayedProcessKiller(container, user, processId, sleepDelayBeforeSigKill, Signal.KILL, exec).start(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 2f0565d92c4..696ebb9796c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -97,7 +97,6 @@ public void setup() throws IOException { conf.setClass( YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); - conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 1000); super.setup(); } @@ -590,8 +589,9 @@ public void testAuxiliaryServiceHelper() throws Exception { AuxiliaryServiceHelper.getServiceDataFromEnv(serviceName, env)); } - @Test - public void testDelayedKill() throws Exception { + private void internalKillTest(boolean delayed) throws Exception { + conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, + delayed ? 1000 : 0); containerManager.start(); // ////// Construct the Container-id @@ -675,7 +675,8 @@ public void testDelayedKill() throws Exception { BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE); - // container stop sends a sigterm followed by a sigkill + // if delayed container stop sends a sigterm followed by a sigkill + // otherwise sigkill is sent immediately GetContainerStatusesRequest gcsRequest = GetContainerStatusesRequest.newInstance(containerIds); @@ -690,7 +691,7 @@ public void testDelayedKill() throws Exception { // Windows, because the process is not notified when killed by winutils. // There is no way for the process to trap and respond. Instead, we can // verify that the job object with ID matching container ID no longer exists. - if (Shell.WINDOWS) { + if (Shell.WINDOWS || !delayed) { Assert.assertFalse("Process is still alive!", DefaultContainerExecutor.containerIsAlive(cId.toString())); } else { @@ -713,6 +714,16 @@ public void testDelayedKill() throws Exception { } } + @Test + public void testDelayedKill() throws Exception { + internalKillTest(true); + } + + @Test + public void testImmediateKill() throws Exception { + internalKillTest(false); + } + @SuppressWarnings("rawtypes") @Test public void testCallFailureWithNullLocalizedResources() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java index c74b2826c5c..f801203b70d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMHAProtocolService.java @@ -67,7 +67,9 @@ public class RMHAProtocolService extends AbstractService implements protected HAServiceState haState = HAServiceState.INITIALIZING; private AccessControlList adminAcl; private Server haAdminServer; - private boolean haEnabled; + + @InterfaceAudience.Private + boolean haEnabled; public RMHAProtocolService(ResourceManager resourceManager) { super("RMHAProtocolService"); @@ -174,7 +176,8 @@ public synchronized void monitorHealth() } } - private synchronized void transitionToActive() throws Exception { + @InterfaceAudience.Private + synchronized void transitionToActive() throws Exception { if (haState == HAServiceState.ACTIVE) { LOG.info("Already in active state"); return; @@ -205,7 +208,8 @@ public synchronized void transitionToActive(StateChangeRequestInfo reqInfo) } } - private synchronized void transitionToStandby(boolean initialize) + @InterfaceAudience.Private + synchronized void transitionToStandby(boolean initialize) throws Exception { if (haState == HAServiceState.STANDBY) { LOG.info("Already in standby state"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 41b119e8512..82a1f649736 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -56,6 +56,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreFactory; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreOperationFailedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreOperationFailedEventType; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; @@ -163,6 +165,10 @@ public class ResourceManager extends CompositeService implements Recoverable { public ResourceManager() { super("ResourceManager"); } + + public RMHAProtocolService getHAService() { + return this.haService; + } public RMContext getRMContext() { return this.rmContext; @@ -216,6 +222,11 @@ protected EventHandler createSchedulerEventDispatcher() { return new SchedulerEventDispatcher(this.scheduler); } + protected RMStateStoreOperationFailedEventDispatcher + createRMStateStoreOperationFailedEventDispatcher() { + return new RMStateStoreOperationFailedEventDispatcher(haService); + } + protected Dispatcher createDispatcher() { return new AsyncDispatcher(); } @@ -339,6 +350,8 @@ protected void serviceInit(Configuration configuration) throws Exception { try { rmStore.init(conf); rmStore.setRMDispatcher(rmDispatcher); + rmDispatcher.register(RMStateStoreOperationFailedEventType.class, + createRMStateStoreOperationFailedEventDispatcher()); } catch (Exception e) { // the Exception from stateStore.init() needs to be handled for // HA and we need to give up master status if we got fenced @@ -632,6 +645,46 @@ public void handle(SchedulerEvent event) { } } + @Private + public static class RMStateStoreOperationFailedEventDispatcher implements + EventHandler { + private final RMHAProtocolService haService; + + public RMStateStoreOperationFailedEventDispatcher( + RMHAProtocolService haService) { + this.haService = haService; + } + + @Override + public void handle(RMStateStoreOperationFailedEvent event) { + if (LOG.isDebugEnabled()) { + LOG.debug("Received a " + + RMStateStoreOperationFailedEvent.class.getName() + " of type " + + event.getType().name()); + } + if (event.getType() == RMStateStoreOperationFailedEventType.FENCED) { + LOG.info("RMStateStore has been fenced"); + synchronized(haService) { + if (haService.haEnabled) { + try { + // Transition to standby and reinit active services + LOG.info("Transitioning RM to Standby mode"); + haService.transitionToStandby(true); + return; + } catch (Exception e) { + LOG.error("Failed to transition RM to Standby mode."); + } + } + } + } + + LOG.error("Shutting down RM on receiving a " + + RMStateStoreOperationFailedEvent.class.getName() + " of type " + + event.getType().name()); + ExitUtil.terminate(1, event.getCause()); + } + } + @Private public static final class ApplicationEventDispatcher implements EventHandler { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 911107daab7..b9724d210bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; + import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; @@ -388,9 +389,13 @@ protected abstract void updateApplicationAttemptStateInternal(String attemptId, */ public synchronized void storeRMDelegationTokenAndSequenceNumber( RMDelegationTokenIdentifier rmDTIdentifier, Long renewDate, - int latestSequenceNumber) throws Exception { - storeRMDelegationTokenAndSequenceNumberState(rmDTIdentifier, renewDate, - latestSequenceNumber); + int latestSequenceNumber) { + try { + storeRMDelegationTokenAndSequenceNumberState(rmDTIdentifier, renewDate, + latestSequenceNumber); + } catch (Exception e) { + notifyStoreOperationFailed(e); + } } /** @@ -406,9 +411,12 @@ protected abstract void storeRMDelegationTokenAndSequenceNumberState( * RMDTSecretManager call this to remove the state of a delegation token */ public synchronized void removeRMDelegationToken( - RMDelegationTokenIdentifier rmDTIdentifier, int sequenceNumber) - throws Exception { - removeRMDelegationTokenState(rmDTIdentifier); + RMDelegationTokenIdentifier rmDTIdentifier, int sequenceNumber) { + try { + removeRMDelegationTokenState(rmDTIdentifier); + } catch (Exception e) { + notifyStoreOperationFailed(e); + } } /** @@ -421,9 +429,12 @@ protected abstract void removeRMDelegationTokenState( /** * RMDTSecretManager call this to store the state of a master key */ - public synchronized void storeRMDTMasterKey(DelegationKey delegationKey) - throws Exception { - storeRMDTMasterKeyState(delegationKey); + public synchronized void storeRMDTMasterKey(DelegationKey delegationKey) { + try { + storeRMDTMasterKeyState(delegationKey); + } catch (Exception e) { + notifyStoreOperationFailed(e); + } } /** @@ -437,9 +448,12 @@ protected abstract void storeRMDTMasterKeyState(DelegationKey delegationKey) /** * RMDTSecretManager call this to remove the state of a master key */ - public synchronized void removeRMDTMasterKey(DelegationKey delegationKey) - throws Exception { - removeRMDTMasterKeyState(delegationKey); + public synchronized void removeRMDTMasterKey(DelegationKey delegationKey) { + try { + removeRMDTMasterKeyState(delegationKey); + } catch (Exception e) { + notifyStoreOperationFailed(e); + } } /** @@ -539,19 +553,15 @@ protected void handleStoreEvent(RMStateStoreEvent event) { try { if (event.getType().equals(RMStateStoreEventType.STORE_APP)) { storeApplicationStateInternal(appId.toString(), appStateData); + notifyDoneStoringApplication(appId, storedException); } else { assert event.getType().equals(RMStateStoreEventType.UPDATE_APP); updateApplicationStateInternal(appId.toString(), appStateData); + notifyDoneUpdatingApplication(appId, storedException); } } catch (Exception e) { LOG.error("Error storing app: " + appId, e); - storedException = e; - } finally { - if (event.getType().equals(RMStateStoreEventType.STORE_APP)) { - notifyDoneStoringApplication(appId, storedException); - } else { - notifyDoneUpdatingApplication(appId, storedException); - } + notifyStoreOperationFailed(e); } } else if (event.getType().equals(RMStateStoreEventType.STORE_APP_ATTEMPT) || event.getType().equals(RMStateStoreEventType.UPDATE_APP_ATTEMPT)) { @@ -589,24 +599,20 @@ protected void handleStoreEvent(RMStateStoreEvent event) { if (event.getType().equals(RMStateStoreEventType.STORE_APP_ATTEMPT)) { storeApplicationAttemptStateInternal(attemptState.getAttemptId() .toString(), attemptStateData); + notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(), + storedException); } else { assert event.getType().equals( RMStateStoreEventType.UPDATE_APP_ATTEMPT); updateApplicationAttemptStateInternal(attemptState.getAttemptId() .toString(), attemptStateData); + notifyDoneUpdatingApplicationAttempt(attemptState.getAttemptId(), + storedException); } } catch (Exception e) { - LOG - .error("Error storing appAttempt: " + attemptState.getAttemptId(), e); - storedException = e; - } finally { - if (event.getType().equals(RMStateStoreEventType.STORE_APP_ATTEMPT)) { - notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(), - storedException); - } else { - notifyDoneUpdatingApplicationAttempt(attemptState.getAttemptId(), - storedException); - } + LOG.error( + "Error storing appAttempt: " + attemptState.getAttemptId(), e); + notifyStoreOperationFailed(e); } } else if (event.getType().equals(RMStateStoreEventType.REMOVE_APP)) { ApplicationState appState = @@ -616,17 +622,34 @@ protected void handleStoreEvent(RMStateStoreEvent event) { LOG.info("Removing info for app: " + appId); try { removeApplicationState(appState); + notifyDoneRemovingApplcation(appId, removedException); } catch (Exception e) { LOG.error("Error removing app: " + appId, e); - removedException = e; - } finally { - notifyDoneRemovingApplcation(appId, removedException); + notifyStoreOperationFailed(e); } } else { LOG.error("Unknown RMStateStoreEvent type: " + event.getType()); } } + @SuppressWarnings("unchecked") + /** + * In {#handleStoreEvent}, this method is called to notify the + * ResourceManager that the store operation has failed. + * @param failureCause the exception due to which the operation failed + */ + private void notifyStoreOperationFailed(Exception failureCause) { + RMStateStoreOperationFailedEventType type; + if (failureCause instanceof StoreFencedException) { + type = RMStateStoreOperationFailedEventType.FENCED; + } else { + type = RMStateStoreOperationFailedEventType.FAILED; + } + + rmDispatcher.getEventHandler().handle( + new RMStateStoreOperationFailedEvent(type, failureCause)); + } + @SuppressWarnings("unchecked") /** * In (@link handleStoreEvent}, this method is called to notify the diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreOperationFailedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreOperationFailedEvent.java new file mode 100644 index 00000000000..4656a7154d0 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreOperationFailedEvent.java @@ -0,0 +1,36 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.yarn.event.AbstractEvent; + +public class RMStateStoreOperationFailedEvent + extends AbstractEvent { + private Exception cause; + + RMStateStoreOperationFailedEvent( + RMStateStoreOperationFailedEventType type, Exception cause) { + super(type); + this.cause = cause; + } + + public Exception getCause() { + return this.cause; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreOperationFailedEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreOperationFailedEventType.java new file mode 100644 index 00000000000..f5a7dc5686e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreOperationFailedEventType.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +public enum RMStateStoreOperationFailedEventType { + FENCED, // Store operation failed because it was fenced + FAILED // Store operation failed for no known reason +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFencedException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFencedException.java new file mode 100644 index 00000000000..1f8eb16bbac --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFencedException.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.yarn.exceptions.YarnException; + +public class StoreFencedException extends YarnException { + private static final long serialVersionUID = 1L; + + public StoreFencedException() { + super("RMStateStore has been fenced"); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java index 628d260ea48..45afb4e5d9f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java @@ -23,7 +23,9 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.commons.logging.Log; @@ -31,6 +33,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.delegation.DelegationKey; @@ -38,11 +41,14 @@ import org.apache.hadoop.util.ZKUtil; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.client.RMHAServiceTarget; +import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -53,11 +59,14 @@ import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event; +import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; import org.apache.zookeeper.data.Stat; import com.google.common.annotations.VisibleForTesting; +import org.apache.zookeeper.server.auth.DigestAuthenticationProvider; @Private @Unstable @@ -83,6 +92,55 @@ public class ZKRMStateStore extends RMStateStore { protected ZooKeeper zkClient; private ZooKeeper oldZkClient; + /** Fencing related variables */ + private static final String FENCING_LOCK = "RM_ZK_FENCING_LOCK"; + private String fencingNodePath; + private Op createFencingNodePathOp; + private Op deleteFencingNodePathOp; + + @VisibleForTesting + List zkRootNodeAcl; + private boolean useDefaultFencingScheme = false; + public static final int CREATE_DELETE_PERMS = + ZooDefs.Perms.CREATE | ZooDefs.Perms.DELETE; + private final String zkRootNodeAuthScheme = + new DigestAuthenticationProvider().getScheme(); + + private String zkRootNodeUsername; + private String zkRootNodePassword; + + /** + * Given the {@link Configuration} and {@link ACL}s used (zkAcl) for + * ZooKeeper access, construct the {@link ACL}s for the store's root node. + * In the constructed {@link ACL}, all the users allowed by zkAcl are given + * rwa access, while the current RM has exclude create-delete access. + * + * To be called only when HA is enabled and the configuration doesn't set ACL + * for the root node. + */ + @VisibleForTesting + @Private + @Unstable + protected List constructZkRootNodeACL( + Configuration conf, List sourceACLs) throws NoSuchAlgorithmException { + List zkRootNodeAcl = new ArrayList(); + for (ACL acl : sourceACLs) { + zkRootNodeAcl.add(new ACL( + ZKUtil.removeSpecificPerms(acl.getPerms(), CREATE_DELETE_PERMS), + acl.getId())); + } + + zkRootNodeUsername = HAUtil.getConfValueForRMInstance( + YarnConfiguration.RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADDRESS, conf); + zkRootNodePassword = Long.toString(ResourceManager.getClusterTimeStamp()); + Id rmId = new Id(zkRootNodeAuthScheme, + DigestAuthenticationProvider.generateDigest( + zkRootNodeUsername + ":" + zkRootNodePassword)); + zkRootNodeAcl.add(new ACL(CREATE_DELETE_PERMS, rmId)); + return zkRootNodeAcl; + } + @Override public synchronized void initInternal(Configuration conf) throws Exception { zkHostPort = conf.get(YarnConfiguration.ZK_RM_STATE_STORE_ADDRESS); @@ -116,6 +174,29 @@ public synchronized void initInternal(Configuration conf) throws Exception { zkRootNodePath = znodeWorkingPath + "/" + ROOT_ZNODE_NAME; rmDTSecretManagerRoot = zkRootNodePath + "/" + RM_DT_SECRET_MANAGER_ROOT; rmAppRoot = zkRootNodePath + "/" + RM_APP_ROOT; + + /* Initialize fencing related paths, acls, and ops */ + fencingNodePath = zkRootNodePath + "/" + FENCING_LOCK; + createFencingNodePathOp = Op.create(fencingNodePath, new byte[0], zkAcl, + CreateMode.PERSISTENT); + deleteFencingNodePathOp = Op.delete(fencingNodePath, -1); + if (HAUtil.isHAEnabled(conf)) { + String zkRootNodeAclConf = HAUtil.getConfValueForRMInstance + (YarnConfiguration.ZK_RM_STATE_STORE_ROOT_NODE_ACL, conf); + if (zkRootNodeAclConf != null) { + zkRootNodeAclConf = ZKUtil.resolveConfIndirection(zkRootNodeAclConf); + try { + zkRootNodeAcl = ZKUtil.parseACLs(zkRootNodeAclConf); + } catch (ZKUtil.BadAclFormatException bafe) { + LOG.error("Invalid format for " + + YarnConfiguration.ZK_RM_STATE_STORE_ROOT_NODE_ACL); + throw bafe; + } + } else { + useDefaultFencingScheme = true; + zkRootNodeAcl = constructZkRootNodeACL(conf, zkAcl); + } + } } @Override @@ -126,20 +207,76 @@ public synchronized void startInternal() throws Exception { // ensure root dirs exist createRootDir(znodeWorkingPath); createRootDir(zkRootNodePath); + if (HAUtil.isHAEnabled(getConfig())){ + fence(); + } createRootDir(rmDTSecretManagerRoot); createRootDir(rmAppRoot); } - private void createRootDir(String rootPath) throws Exception { + private void createRootDir(final String rootPath) throws Exception { + // For root dirs, we shouldn't use the doMulti helper methods try { - createWithRetries(rootPath, null, zkAcl, CreateMode.PERSISTENT); + new ZKAction() { + @Override + public String run() throws KeeperException, InterruptedException { + return zkClient.create(rootPath, null, zkAcl, CreateMode.PERSISTENT); + } + }.runWithRetries(); } catch (KeeperException ke) { - if (ke.code() != Code.NODEEXISTS) { + if (ke.code() == Code.NODEEXISTS) { + LOG.debug(rootPath + "znode already exists!"); + } else { throw ke; } } } + private void logRootNodeAcls(String prefix) throws KeeperException, + InterruptedException { + Stat getStat = new Stat(); + List getAcls = zkClient.getACL(zkRootNodePath, getStat); + + StringBuilder builder = new StringBuilder(); + builder.append(prefix); + for (ACL acl : getAcls) { + builder.append(acl.toString()); + } + builder.append(getStat.toString()); + LOG.debug(builder.toString()); + } + + private synchronized void fence() throws Exception { + if (LOG.isTraceEnabled()) { + logRootNodeAcls("Before fencing\n"); + } + + new ZKAction() { + @Override + public Void run() throws KeeperException, InterruptedException { + zkClient.setACL(zkRootNodePath, zkRootNodeAcl, -1); + return null; + } + }.runWithRetries(); + + // delete fencingnodepath + new ZKAction() { + @Override + public Void run() throws KeeperException, InterruptedException { + try { + zkClient.multi(Collections.singletonList(deleteFencingNodePathOp)); + } catch (KeeperException.NoNodeException nne) { + LOG.info("Fencing node " + fencingNodePath + " doesn't exist to delete"); + } + return null; + } + }.runWithRetries(); + + if (LOG.isTraceEnabled()) { + logRootNodeAcls("After fencing\n"); + } + } + private synchronized void closeZkClients() throws IOException { if (zkClient != null) { try { @@ -176,7 +313,8 @@ public synchronized RMState loadState() throws Exception { private synchronized void loadRMDTSecretManagerState(RMState rmState) throws Exception { - List childNodes = zkClient.getChildren(rmDTSecretManagerRoot, true); + List childNodes = + getChildrenWithRetries(rmDTSecretManagerRoot, true); for (String childNodeName : childNodes) { if (childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) { @@ -209,7 +347,7 @@ private synchronized void loadRMDTSecretManagerState(RMState rmState) } private synchronized void loadRMAppState(RMState rmState) throws Exception { - List childNodes = zkClient.getChildren(rmAppRoot, true); + List childNodes = getChildrenWithRetries(rmAppRoot, true); List attempts = new ArrayList(); for (String childNodeName : childNodes) { @@ -466,6 +604,8 @@ public void process(WatchedEvent event) { } @VisibleForTesting + @Private + @Unstable public synchronized void processWatchEvent(WatchedEvent event) throws Exception { Event.EventType eventType = event.getType(); @@ -506,65 +646,71 @@ public synchronized void processWatchEvent(WatchedEvent event) } @VisibleForTesting + @Private + @Unstable String getNodePath(String root, String nodeName) { return (root + "/" + nodeName); } - @VisibleForTesting - public String createWithRetries( - final String path, final byte[] data, final List acl, - final CreateMode mode) throws Exception { - return new ZKAction() { + /** + * Helper method that creates fencing node, executes the passed operations, + * and deletes the fencing node. + */ + private synchronized void doMultiWithRetries( + final List opList) throws Exception { + final List execOpList = new ArrayList(opList.size() + 2); + execOpList.add(createFencingNodePathOp); + execOpList.addAll(opList); + execOpList.add(deleteFencingNodePathOp); + new ZKAction() { @Override - public String run() throws KeeperException, InterruptedException { - return zkClient.create(path, data, acl, mode); + public Void run() throws KeeperException, InterruptedException { + zkClient.multi(execOpList); + return null; } }.runWithRetries(); } + /** + * Helper method that creates fencing node, executes the passed operation, + * and deletes the fencing node. + */ + private void doMultiWithRetries(final Op op) throws Exception { + doMultiWithRetries(Collections.singletonList(op)); + } + + @VisibleForTesting + @Private + @Unstable + public void createWithRetries( + final String path, final byte[] data, final List acl, + final CreateMode mode) throws Exception { + doMultiWithRetries(Op.create(path, data, acl, mode)); + } + private void deleteWithRetries(final String path, final int version) throws Exception { - new ZKAction() { - @Override - public Void run() throws KeeperException, InterruptedException { - /** - * Call exists() to leave a watch on the node denoted by path. - * Delete node if exists. To pass the existence information to the - * caller, call delete irrespective of whether node exists or not. - */ - if (zkClient.exists(path, true) == null) { - LOG.error("Trying to delete a path (" + path - + ") that doesn't exist."); - } - zkClient.delete(path, version); - return null; + try { + doMultiWithRetries(Op.delete(path, version)); + } catch (KeeperException.NoNodeException nne) { + // We tried to delete a node that doesn't exist + if (LOG.isDebugEnabled()) { + LOG.debug("Attempted to delete a non-existing znode " + path); } - }.runWithRetries(); - } - - private void doMultiWithRetries(final ArrayList opList) throws Exception { - new ZKAction() { - @Override - public Void run() throws KeeperException, InterruptedException { - zkClient.multi(opList); - return null; - } - }.runWithRetries(); + } } @VisibleForTesting + @Private + @Unstable public void setDataWithRetries(final String path, final byte[] data, final int version) throws Exception { - new ZKAction() { - @Override - public Void run() throws KeeperException, InterruptedException { - zkClient.setData(path, data, version); - return null; - } - }.runWithRetries(); + doMultiWithRetries(Op.setData(path, data, version)); } @VisibleForTesting + @Private + @Unstable public byte[] getDataWithRetries(final String path, final boolean watch) throws Exception { return new ZKAction() { @@ -576,6 +722,16 @@ public byte[] run() throws KeeperException, InterruptedException { }.runWithRetries(); } + private List getChildrenWithRetries( + final String path, final boolean watch) throws Exception { + return new ZKAction>() { + @Override + List run() throws KeeperException, InterruptedException { + return zkClient.getChildren(path, watch); + } + }.runWithRetries(); + } + private abstract class ZKAction { // run() expects synchronization on ZKRMStateStore.this abstract T run() throws KeeperException, InterruptedException; @@ -596,11 +752,29 @@ T runWithCheck() throws Exception { } } + private boolean shouldRetry(Code code) { + switch (code) { + case CONNECTIONLOSS: + case OPERATIONTIMEOUT: + return true; + default: + break; + } + return false; + } + T runWithRetries() throws Exception { int retry = 0; while (true) { try { return runWithCheck(); + } catch (KeeperException.NoAuthException nae) { + if (HAUtil.isHAEnabled(getConfig())) { + // NoAuthException possibly means that this store is fenced due to + // another RM becoming active. Even if not, + // it is safer to assume we have been fenced + throw new StoreFencedException(); + } } catch (KeeperException ke) { if (shouldRetry(ke.code()) && ++retry < numRetries) { continue; @@ -611,17 +785,6 @@ T runWithRetries() throws Exception { } } - private static boolean shouldRetry(Code code) { - switch (code) { - case CONNECTIONLOSS: - case OPERATIONTIMEOUT: - return true; - default: - break; - } - return false; - } - private synchronized void createConnection() throws IOException, InterruptedException { closeZkClients(); @@ -629,6 +792,10 @@ private synchronized void createConnection() retries++) { try { zkClient = getNewZooKeeper(); + if (useDefaultFencingScheme) { + zkClient.addAuthInfo(zkRootNodeAuthScheme, + (zkRootNodeUsername + ":" + zkRootNodePassword).getBytes()); + } } catch (IOException ioe) { // Retry in case of network failures LOG.info("Failed to connect to the ZooKeeper on attempt - " + @@ -646,6 +813,8 @@ private synchronized void createConnection() // protected to mock for testing @VisibleForTesting + @Private + @Unstable protected synchronized ZooKeeper getNewZooKeeper() throws IOException, InterruptedException { ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, null); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 422a43b668b..7d6c6a50523 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -136,9 +136,6 @@ public class FairScheduler implements ResourceScheduler { // How often fair shares are re-calculated (ms) protected long UPDATE_INTERVAL = 500; - // Whether to use username in place of "default" queue name - private volatile boolean userAsDefaultQueue = false; - private final static List EMPTY_CONTAINER_LIST = new ArrayList(); @@ -640,6 +637,12 @@ protected synchronized void addApplication( RMApp rmApp = rmContext.getRMApps().get( applicationAttemptId.getApplicationId()); FSLeafQueue queue = assignToQueue(rmApp, queueName, user); + if (queue == null) { + rmContext.getDispatcher().getEventHandler().handle( + new RMAppAttemptRejectedEvent(applicationAttemptId, + "Application rejected by queue placement policy")); + return; + } FSSchedulerApp schedulerApp = new FSSchedulerApp(applicationAttemptId, user, @@ -675,17 +678,16 @@ queue, new ActiveUsersManager(getRootQueueMetrics()), @VisibleForTesting FSLeafQueue assignToQueue(RMApp rmApp, String queueName, String user) { - // Potentially set queue to username if configured to do so - if (queueName.equals(YarnConfiguration.DEFAULT_QUEUE_NAME) && - userAsDefaultQueue) { - queueName = user; - } - - FSLeafQueue queue = queueMgr.getLeafQueue(queueName, - conf.getAllowUndeclaredPools()); - if (queue == null) { - // queue is not an existing or createable leaf queue - queue = queueMgr.getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME, false); + FSLeafQueue queue = null; + try { + QueuePlacementPolicy policy = queueMgr.getPlacementPolicy(); + queueName = policy.assignAppToQueue(queueName, user); + if (queueName == null) { + return null; + } + queue = queueMgr.getLeafQueue(queueName, true); + } catch (IOException ex) { + LOG.error("Error assigning app to queue, rejecting", ex); } if (rmApp != null) { @@ -1155,7 +1157,6 @@ public synchronized void reinitialize(Configuration conf, RMContext rmContext) minimumAllocation = this.conf.getMinimumAllocation(); maximumAllocation = this.conf.getMaximumAllocation(); incrAllocation = this.conf.getIncrementAllocation(); - userAsDefaultQueue = this.conf.getUserAsDefaultQueue(); continuousSchedulingEnabled = this.conf.isContinuousSchedulingEnabled(); continuousSchedulingSleepMs = this.conf.getContinuousSchedulingSleepMs(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java index 81721b0e7f8..4dd9c461001 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.CopyOnWriteArrayList; @@ -51,6 +52,8 @@ import org.w3c.dom.Text; import org.xml.sax.SAXException; +import com.google.common.annotations.VisibleForTesting; + /** * Maintains a list of queues as well as scheduling parameters for each queue, * such as guaranteed share allocations, from the fair scheduler config file. @@ -87,6 +90,8 @@ public class QueueManager { private FSParentQueue rootQueue; private volatile QueueManagerInfo info = new QueueManagerInfo(); + @VisibleForTesting + volatile QueuePlacementPolicy placementPolicy; private long lastReloadAttempt; // Last time we tried to reload the queues file private long lastSuccessfulReload; // Last time we successfully reloaded queues @@ -107,6 +112,8 @@ public void initialize() throws IOException, SAXException, queues.put(rootQueue.getName(), rootQueue); this.allocFile = conf.getAllocationFile(); + placementPolicy = new QueuePlacementPolicy(getSimplePlacementRules(), + new HashSet(), conf); reloadAllocs(); lastSuccessfulReload = scheduler.getClock().getTime(); @@ -115,6 +122,28 @@ public void initialize() throws IOException, SAXException, getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME, true); } + public void updatePlacementPolicy(FairSchedulerConfiguration conf) { + + } + + /** + * Construct simple queue placement policy from allow-undeclared-pools and + * user-as-default-queue. + */ + private List getSimplePlacementRules() { + boolean create = scheduler.getConf().getAllowUndeclaredPools(); + boolean userAsDefaultQueue = scheduler.getConf().getUserAsDefaultQueue(); + List rules = new ArrayList(); + rules.add(new QueuePlacementRule.Specified().initialize(create, null)); + if (userAsDefaultQueue) { + rules.add(new QueuePlacementRule.User().initialize(create, null)); + } + if (!userAsDefaultQueue || !create) { + rules.add(new QueuePlacementRule.Default().initialize(true, null)); + } + return rules; + } + /** * Get a queue by name, creating it if the create param is true and is necessary. * If the queue is not or can not be a leaf queue, i.e. it already exists as a @@ -226,6 +255,10 @@ public boolean exists(String name) { return queues.containsKey(name); } } + + public QueuePlacementPolicy getPlacementPolicy() { + return placementPolicy; + } /** * Reload allocations file if it hasn't been loaded in a while @@ -290,6 +323,8 @@ public void reloadAllocs() throws IOException, ParserConfigurationException, long fairSharePreemptionTimeout = Long.MAX_VALUE; long defaultMinSharePreemptionTimeout = Long.MAX_VALUE; SchedulingPolicy defaultSchedPolicy = SchedulingPolicy.getDefault(); + + QueuePlacementPolicy newPlacementPolicy = null; // Remember all queue names so we can display them on web UI, etc. List queueNamesInAllocFile = new ArrayList(); @@ -306,6 +341,7 @@ public void reloadAllocs() throws IOException, ParserConfigurationException, "file: top-level element not "); NodeList elements = root.getChildNodes(); List queueElements = new ArrayList(); + Element placementPolicyElement = null; for (int i = 0; i < elements.getLength(); i++) { Node node = elements.item(i); if (node instanceof Element) { @@ -348,6 +384,8 @@ public void reloadAllocs() throws IOException, ParserConfigurationException, String text = ((Text)element.getFirstChild()).getData().trim(); SchedulingPolicy.setDefault(text); defaultSchedPolicy = SchedulingPolicy.getDefault(); + } else if ("queuePlacementPolicy".equals(element.getTagName())) { + placementPolicyElement = element; } else { LOG.warn("Bad element in allocations file: " + element.getTagName()); } @@ -369,6 +407,15 @@ public void reloadAllocs() throws IOException, ParserConfigurationException, userMaxApps, queueWeights, queuePolicies, minSharePreemptionTimeouts, queueAcls, queueNamesInAllocFile); } + + // Load placement policy and pass it configured queues + if (placementPolicyElement != null) { + newPlacementPolicy = QueuePlacementPolicy.fromXml(placementPolicyElement, + new HashSet(queueNamesInAllocFile), scheduler.getConf()); + } else { + newPlacementPolicy = new QueuePlacementPolicy(getSimplePlacementRules(), + new HashSet(queueNamesInAllocFile), scheduler.getConf()); + } // Commit the reload; also create any queue defined in the alloc file // if it does not already exist, so it can be displayed on the web UI. @@ -377,6 +424,7 @@ public void reloadAllocs() throws IOException, ParserConfigurationException, queueMaxApps, userMaxApps, queueWeights, userMaxAppsDefault, queueMaxAppsDefault, defaultSchedPolicy, minSharePreemptionTimeouts, queueAcls, fairSharePreemptionTimeout, defaultMinSharePreemptionTimeout); + placementPolicy = newPlacementPolicy; // Make sure all queues exist for (String name: queueNamesInAllocFile) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueuePlacementPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueuePlacementPolicy.java new file mode 100644 index 00000000000..402b47a0e44 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueuePlacementPolicy.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.Groups; +import org.apache.hadoop.util.ReflectionUtils; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public class QueuePlacementPolicy { + private static final Map> ruleClasses; + static { + Map> map = + new HashMap>(); + map.put("user", QueuePlacementRule.User.class); + map.put("primaryGroup", QueuePlacementRule.PrimaryGroup.class); + map.put("specified", QueuePlacementRule.Specified.class); + map.put("default", QueuePlacementRule.Default.class); + map.put("reject", QueuePlacementRule.Reject.class); + ruleClasses = Collections.unmodifiableMap(map); + } + + private final List rules; + private final Set configuredQueues; + private final Groups groups; + + public QueuePlacementPolicy(List rules, + Set configuredQueues, Configuration conf) + throws AllocationConfigurationException { + for (int i = 0; i < rules.size()-1; i++) { + if (rules.get(i).isTerminal()) { + throw new AllocationConfigurationException("Rules after rule " + + i + " in queue placement policy can never be reached"); + } + } + if (!rules.get(rules.size()-1).isTerminal()) { + throw new AllocationConfigurationException( + "Could get past last queue placement rule without assigning"); + } + this.rules = rules; + this.configuredQueues = configuredQueues; + groups = new Groups(conf); + } + + /** + * Builds a QueuePlacementPolicy from an xml element. + */ + public static QueuePlacementPolicy fromXml(Element el, Set configuredQueues, + Configuration conf) throws AllocationConfigurationException { + List rules = new ArrayList(); + NodeList elements = el.getChildNodes(); + for (int i = 0; i < elements.getLength(); i++) { + Node node = elements.item(i); + if (node instanceof Element) { + Element element = (Element)node; + String ruleName = element.getTagName(); + Class clazz = ruleClasses.get(ruleName); + if (clazz == null) { + throw new AllocationConfigurationException("No rule class found for " + + ruleName); + } + QueuePlacementRule rule = ReflectionUtils.newInstance(clazz, null); + rule.initializeFromXml(element); + rules.add(rule); + } + } + return new QueuePlacementPolicy(rules, configuredQueues, conf); + } + + /** + * Applies this rule to an app with the given requested queue and user/group + * information. + * + * @param requestedQueue + * The queue specified in the ApplicationSubmissionContext + * @param user + * The user submitting the app + * @return + * The name of the queue to assign the app to. Or null if the app should + * be rejected. + * @throws IOException + * If an exception is encountered while getting the user's groups + */ + public String assignAppToQueue(String requestedQueue, String user) + throws IOException { + for (QueuePlacementRule rule : rules) { + String queue = rule.assignAppToQueue(requestedQueue, user, groups, + configuredQueues); + if (queue == null || !queue.isEmpty()) { + return queue; + } + } + throw new IllegalStateException("Should have applied a rule before " + + "reaching here"); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueuePlacementRule.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueuePlacementRule.java new file mode 100644 index 00000000000..95acdcae04f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueuePlacementRule.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.security.Groups; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; + +public abstract class QueuePlacementRule { + protected boolean create; + + /** + * Initializes the rule with any arguments. + * + * @param args + * Additional attributes of the rule's xml element other than create. + */ + public QueuePlacementRule initialize(boolean create, Map args) { + this.create = create; + return this; + } + + /** + * + * @param requestedQueue + * The queue explicitly requested. + * @param user + * The user submitting the app. + * @param groups + * The groups of the user submitting the app. + * @param configuredQueues + * The queues specified in the scheduler configuration. + * @return + * The queue to place the app into. An empty string indicates that we should + * continue to the next rule, and null indicates that the app should be rejected. + */ + public String assignAppToQueue(String requestedQueue, String user, + Groups groups, Collection configuredQueues) throws IOException { + String queue = getQueueForApp(requestedQueue, user, groups); + if (create || configuredQueues.contains(queue)) { + return queue; + } else { + return ""; + } + } + + public void initializeFromXml(Element el) { + boolean create = true; + NamedNodeMap attributes = el.getAttributes(); + Map args = new HashMap(); + for (int i = 0; i < attributes.getLength(); i++) { + Node node = attributes.item(i); + String key = node.getNodeName(); + String value = node.getNodeValue(); + if (key.equals("create")) { + create = Boolean.parseBoolean(value); + } else { + args.put(key, value); + } + } + initialize(create, args); + } + + /** + * Returns true if this rule never tells the policy to continue. + */ + public abstract boolean isTerminal(); + + /** + * Applies this rule to an app with the given requested queue and user/group + * information. + * + * @param requestedQueue + * The queue specified in the ApplicationSubmissionContext + * @param user + * The user submitting the app. + * @param groups + * The groups of the user submitting the app. + * @return + * The name of the queue to assign the app to, or null to empty string + * continue to the next rule. + */ + protected abstract String getQueueForApp(String requestedQueue, String user, + Groups groups) throws IOException; + + /** + * Places apps in queues by username of the submitter + */ + public static class User extends QueuePlacementRule { + @Override + protected String getQueueForApp(String requestedQueue, + String user, Groups groups) { + return "root." + user; + } + + @Override + public boolean isTerminal() { + return create; + } + } + + /** + * Places apps in queues by primary group of the submitter + */ + public static class PrimaryGroup extends QueuePlacementRule { + @Override + protected String getQueueForApp(String requestedQueue, + String user, Groups groups) throws IOException { + return "root." + groups.getGroups(user).get(0); + } + + @Override + public boolean isTerminal() { + return create; + } + } + + /** + * Places apps in queues by requested queue of the submitter + */ + public static class Specified extends QueuePlacementRule { + @Override + protected String getQueueForApp(String requestedQueue, + String user, Groups groups) { + if (requestedQueue.equals(YarnConfiguration.DEFAULT_QUEUE_NAME)) { + return ""; + } else { + if (!requestedQueue.startsWith("root.")) { + requestedQueue = "root." + requestedQueue; + } + return requestedQueue; + } + } + + @Override + public boolean isTerminal() { + return false; + } + } + + /** + * Places all apps in the default queue + */ + public static class Default extends QueuePlacementRule { + @Override + protected String getQueueForApp(String requestedQueue, String user, + Groups groups) { + return "root." + YarnConfiguration.DEFAULT_QUEUE_NAME; + } + + @Override + public boolean isTerminal() { + return create; + } + } + + /** + * Rejects all apps + */ + public static class Reject extends QueuePlacementRule { + @Override + public String assignAppToQueue(String requestedQueue, String user, + Groups groups, Collection configuredQueues) { + return null; + } + + @Override + protected String getQueueForApp(String requestedQueue, String user, + Groups groups) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isTerminal() { + return true; + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java index a6929a89366..7f07ddb4c49 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java @@ -18,15 +18,32 @@ package org.apache.hadoop.yarn.server.resourcemanager.recovery; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.HAServiceProtocol; +import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.ClientRMService; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.zookeeper.ZooKeeper; import org.junit.Test; @@ -56,7 +73,7 @@ public ZooKeeper getNewZooKeeper() throws IOException { public RMStateStore getRMStateStore() throws Exception { String workingZnode = "/Test"; - YarnConfiguration conf = new YarnConfiguration(); + Configuration conf = new YarnConfiguration(); conf.set(YarnConfiguration.ZK_RM_STATE_STORE_ADDRESS, hostPort); conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode); this.client = createClient(); @@ -77,4 +94,81 @@ public void testZKRMStateStoreRealZK() throws Exception { testRMAppStateStore(zkTester); testRMDTSecretManagerStateStore(zkTester); } + + private Configuration createHARMConf( + String rmIds, String rmId, int adminPort) { + Configuration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, rmIds); + conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true); + conf.set(YarnConfiguration.RM_STORE, ZKRMStateStore.class.getName()); + conf.set(YarnConfiguration.ZK_RM_STATE_STORE_ADDRESS, hostPort); + conf.set(YarnConfiguration.RM_HA_ID, rmId); + for (String rpcAddress : HAUtil.RPC_ADDRESS_CONF_KEYS) { + conf.set(HAUtil.addSuffix(rpcAddress, rmId), "localhost:0"); + } + conf.set(YarnConfiguration.RM_HA_ADMIN_ADDRESS, "localhost:" + adminPort); + return conf; + } + + @SuppressWarnings("unchecked") + @Test + public void testFencing() throws Exception { + StateChangeRequestInfo req = new StateChangeRequestInfo( + HAServiceProtocol.RequestSource.REQUEST_BY_USER); + + Configuration conf1 = createHARMConf("rm1,rm2", "rm1", 1234); + ResourceManager rm1 = new ResourceManager(); + rm1.init(conf1); + rm1.start(); + rm1.getHAService().transitionToActive(req); + assertEquals("RM with ZKStore didn't start", + Service.STATE.STARTED, rm1.getServiceState()); + assertEquals("RM should be Active", + HAServiceProtocol.HAServiceState.ACTIVE, + rm1.getHAService().getServiceStatus().getState()); + + Configuration conf2 = createHARMConf("rm1,rm2", "rm2", 5678); + ResourceManager rm2 = new ResourceManager(); + rm2.init(conf2); + rm2.start(); + rm2.getHAService().transitionToActive(req); + assertEquals("RM with ZKStore didn't start", + Service.STATE.STARTED, rm2.getServiceState()); + assertEquals("RM should be Active", + HAServiceProtocol.HAServiceState.ACTIVE, + rm2.getHAService().getServiceStatus().getState()); + + // Submitting an application to RM1 to trigger a state store operation. + // RM1 should realize that it got fenced and is not the Active RM anymore. + Map mockMap = mock(Map.class); + ApplicationSubmissionContext asc = + ApplicationSubmissionContext.newInstance( + ApplicationId.newInstance(1000, 1), + "testApplication", // app Name + "default", // queue name + Priority.newInstance(0), + ContainerLaunchContext.newInstance(mockMap, mockMap, + new ArrayList(), mockMap, mock(ByteBuffer.class), + mockMap), + false, // unmanaged AM + true, // cancelTokens + 1, // max app attempts + Resource.newInstance(1024, 1)); + ClientRMService rmService = rm1.getClientRMService(); + rmService.submitApplication(SubmitApplicationRequest.newInstance(asc)); + + for (int i = 0; i < 30; i++) { + if (HAServiceProtocol.HAServiceState.ACTIVE == rm1.getHAService() + .getServiceStatus().getState()) { + Thread.sleep(100); + } + } + assertEquals("RM should have been fenced", + HAServiceProtocol.HAServiceState.STANDBY, + rm1.getHAService().getServiceStatus().getState()); + assertEquals("RM should be Active", + HAServiceProtocol.HAServiceState.ACTIVE, + rm2.getHAService().getServiceStatus().getState()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SimpleGroupsMapping.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SimpleGroupsMapping.java new file mode 100644 index 00000000000..24dd65d7f5c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SimpleGroupsMapping.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.security.GroupMappingServiceProvider; + +public class SimpleGroupsMapping implements GroupMappingServiceProvider { + + @Override + public List getGroups(String user) { + return Arrays.asList(user + "group"); + } + + @Override + public void cacheGroupsRefresh() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void cacheGroupsAdd(List groups) throws IOException { + throw new UnsupportedOperationException(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 9dfac53d45a..cc2e1ccbdcd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -44,7 +44,9 @@ import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.security.GroupMappingServiceProvider; import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -94,6 +96,8 @@ import org.junit.Test; import org.xml.sax.SAXException; +import com.google.common.collect.Sets; + public class TestFairScheduler { private class MockClock implements Clock { @@ -616,6 +620,7 @@ public void testUserAsDefaultQueue() throws Exception { conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "false"); scheduler.reinitialize(conf, resourceManager.getRMContext()); + scheduler.getQueueManager().initialize(); AppAddedSchedulerEvent appAddedEvent2 = new AppAddedSchedulerEvent( createAppAttemptId(2, 1), "default", "user2"); scheduler.handle(appAddedEvent2); @@ -664,6 +669,46 @@ public void testAssignToQueue() throws Exception { assertEquals(rmApp2.getQueue(), queue2.getName()); assertEquals("root.notdefault", rmApp2.getQueue()); } + + @Test + public void testQueuePlacementWithPolicy() throws Exception { + Configuration conf = createConfiguration(); + conf.setClass(CommonConfigurationKeys.HADOOP_SECURITY_GROUP_MAPPING, + SimpleGroupsMapping.class, GroupMappingServiceProvider.class); + ApplicationAttemptId appId; + Map apps = scheduler.applications; + + List rules = new ArrayList(); + rules.add(new QueuePlacementRule.Specified().initialize(true, null)); + rules.add(new QueuePlacementRule.User().initialize(false, null)); + rules.add(new QueuePlacementRule.PrimaryGroup().initialize(false, null)); + rules.add(new QueuePlacementRule.Default().initialize(true, null)); + Set queues = Sets.newHashSet("root.user1", "root.user3group"); + scheduler.getQueueManager().placementPolicy = new QueuePlacementPolicy( + rules, queues, conf); + appId = createSchedulingRequest(1024, "somequeue", "user1"); + assertEquals("root.somequeue", apps.get(appId).getQueueName()); + appId = createSchedulingRequest(1024, "default", "user1"); + assertEquals("root.user1", apps.get(appId).getQueueName()); + appId = createSchedulingRequest(1024, "default", "user3"); + assertEquals("root.user3group", apps.get(appId).getQueueName()); + appId = createSchedulingRequest(1024, "default", "otheruser"); + assertEquals("root.default", apps.get(appId).getQueueName()); + + // test without specified as first rule + rules = new ArrayList(); + rules.add(new QueuePlacementRule.User().initialize(false, null)); + rules.add(new QueuePlacementRule.Specified().initialize(true, null)); + rules.add(new QueuePlacementRule.Default().initialize(true, null)); + scheduler.getQueueManager().placementPolicy = new QueuePlacementPolicy( + rules, queues, conf); + appId = createSchedulingRequest(1024, "somequeue", "user1"); + assertEquals("root.user1", apps.get(appId).getQueueName()); + appId = createSchedulingRequest(1024, "somequeue", "otheruser"); + assertEquals("root.somequeue", apps.get(appId).getQueueName()); + appId = createSchedulingRequest(1024, "default", "otheruser"); + assertEquals("root.default", apps.get(appId).getQueueName()); + } @Test public void testFairShareWithMinAlloc() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java new file mode 100644 index 00000000000..130fe353278 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; + +import static org.junit.Assert.assertEquals; + +import java.util.Set; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.security.GroupMappingServiceProvider; +import org.junit.BeforeClass; +import org.junit.Test; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import com.google.common.collect.Sets; + +public class TestQueuePlacementPolicy { + private final static Configuration conf = new Configuration(); + private final static Set configuredQueues = Sets.newHashSet("root.someuser"); + + @BeforeClass + public static void setup() { + conf.setClass(CommonConfigurationKeys.HADOOP_SECURITY_GROUP_MAPPING, + SimpleGroupsMapping.class, GroupMappingServiceProvider.class); + } + + @Test + public void testSpecifiedUserPolicy() throws Exception { + StringBuffer sb = new StringBuffer(); + sb.append(""); + sb.append(" "); + sb.append(" "); + sb.append(""); + QueuePlacementPolicy policy = parse(sb.toString()); + assertEquals("root.specifiedq",policy.assignAppToQueue("specifiedq", "someuser")); + assertEquals("root.someuser", policy.assignAppToQueue("default", "someuser")); + assertEquals("root.otheruser", policy.assignAppToQueue("default", "otheruser")); + } + + @Test + public void testNoCreate() throws Exception { + StringBuffer sb = new StringBuffer(); + sb.append(""); + sb.append(" "); + sb.append(" "); + sb.append(" "); + sb.append(""); + QueuePlacementPolicy policy = parse(sb.toString()); + assertEquals("root.specifiedq", policy.assignAppToQueue("specifiedq", "someuser")); + assertEquals("root.someuser", policy.assignAppToQueue("default", "someuser")); + assertEquals("root.specifiedq", policy.assignAppToQueue("specifiedq", "otheruser")); + assertEquals("root.default", policy.assignAppToQueue("default", "otheruser")); + } + + @Test + public void testSpecifiedThenReject() throws Exception { + StringBuffer sb = new StringBuffer(); + sb.append(""); + sb.append(" "); + sb.append(" "); + sb.append(""); + QueuePlacementPolicy policy = parse(sb.toString()); + assertEquals("root.specifiedq", policy.assignAppToQueue("specifiedq", "someuser")); + assertEquals(null, policy.assignAppToQueue("default", "someuser")); + } + + @Test (expected = AllocationConfigurationException.class) + public void testOmittedTerminalRule() throws Exception { + StringBuffer sb = new StringBuffer(); + sb.append(""); + sb.append(" "); + sb.append(" "); + sb.append(""); + parse(sb.toString()); + } + + @Test (expected = AllocationConfigurationException.class) + public void testTerminalRuleInMiddle() throws Exception { + StringBuffer sb = new StringBuffer(); + sb.append(""); + sb.append(" "); + sb.append(" "); + sb.append(" "); + sb.append(""); + parse(sb.toString()); + } + + private QueuePlacementPolicy parse(String str) throws Exception { + // Read and parse the allocations file. + DocumentBuilderFactory docBuilderFactory = + DocumentBuilderFactory.newInstance(); + docBuilderFactory.setIgnoringComments(true); + DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); + Document doc = builder.parse(IOUtils.toInputStream(str)); + Element root = doc.getDocumentElement(); + return QueuePlacementPolicy.fromXml(root, configuredQueues, conf); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm index 01f39cbd5fb..d0cd30e5afd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm @@ -101,6 +101,16 @@ Hadoop MapReduce Next Generation - Fair Scheduler Fair Scheduler. Among them, is the use of a custom policies governing priority “boosting” over certain apps. +* {Automatically placing applications in queues} + + The Fair Scheduler allows administrators to configure policies that + automatically place submitted applications into appropriate queues. Placement + can depend on the user and groups of the submitter and the requested queue + passed by the application. A policy consists of a set of rules that are applied + sequentially to classify an incoming application. Each rule either places the + app into a queue, rejects it, or continues on to the next rule. Refer to the + allocation file format below for how to configure these policies. + * {Installation} To use the Fair Scheduler first assign the appropriate scheduler class in @@ -138,7 +148,8 @@ Properties that can be placed in yarn-site.xml * Whether to use the username associated with the allocation as the default queue name, in the event that a queue name is not specified. If this is set to "false" or unset, all jobs have a shared default queue, named "default". - Defaults to true. + Defaults to true. If a queue placement policy is given in the allocations + file, this property is ignored. * <<>> @@ -180,6 +191,16 @@ Properties that can be placed in yarn-site.xml opportunities to pass up. The default value of -1.0 means don't pass up any scheduling opportunities. + * <<>> + + * If this is true, new queues can be created at application submission time, + whether because they are specified as the application's queue by the + submitter or because they are placed there by the user-as-default-queue + property. If this is false, any time an app would be placed in a queue that + is not specified in the allocations file, it is placed in the "default" queue + instead. Defaults to true. If a queue placement policy is given in the + allocations file, this property is ignored. + Allocation file format The allocation file must be in XML format. The format contains five types of @@ -248,8 +269,61 @@ Allocation file format policy for queues; overriden by the schedulingPolicy element in each queue if specified. Defaults to "fair". + * <>, which contains a list of rule elements + that tell the scheduler how to place incoming apps into queues. Rules + are applied in the order that they are listed. Rules may take arguments. All + rules accept the "create" argument, which indicates whether the rule can create + a new queue. "Create" defaults to true; if set to false and the rule would + place the app in a queue that is not configured in the allocations file, we + continue on to the next rule. The last rule must be one that can never issue a + continue. Valid rules are: + + * specified: the app is placed into the queue it requested. If the app + requested no queue, i.e. it specified "default", we continue. + + * user: the app is placed into a queue with the name of the user who + submitted it. + + * primaryGroup: the app is placed into a queue with the name of the + primary group of the user who submitted it. + + * default: the app is placed into the queue named "default". + + * reject: the app is rejected. + An example allocation file is given here: +--- + + + + 10000 mb,0vcores + 90000 mb,0vcores + 50 + 2.0 + fair + + charlie + 5000 mb,0vcores + + + + + 30 + + 5 + + + + + + + +--- + + Note that for backwards compatibility with the original FairScheduler, "queue" elements can instead be named as "pool" elements. + + Queue Access Control Lists (ACLs) Queue Access Control Lists (ACLs) allow administrators to control who may @@ -268,28 +342,6 @@ Queue Access Control Lists (ACLs) To start restricting access, change the root queue's ACLs to something other than "*". ---- - - - - 10000 mb,0vcores - 90000 mb,0vcores - 50 - 2.0 - fair - - charlie - 5000 mb,0vcores - - - - 30 - - 5 - ---- - - Note that for backwards compatibility with the original FairScheduler, "queue" elements can instead be named as "pool" elements. * {Administration} diff --git a/hadoop-yarn-project/hadoop-yarn/pom.xml b/hadoop-yarn-project/hadoop-yarn/pom.xml index 4764b19ff74..d9d9b9c945b 100644 --- a/hadoop-yarn-project/hadoop-yarn/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/pom.xml @@ -119,6 +119,7 @@ com.sun.jersey.jersey-test-framework jersey-test-framework-grizzly2 + test com.sun.jersey