From 43589a8df70d4dbaf86609961e27cc4e23dda993 Mon Sep 17 00:00:00 2001 From: Mayank Bansal Date: Tue, 15 Jul 2014 21:48:58 +0000 Subject: [PATCH 01/49] YARN-1408 Preemption caused Invalid State Event: ACQUIRED at KILLED and caused a task timeout for 30mins. (Sunil G via mayank) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610860 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../rmcontainer/RMContainer.java | 5 ++ .../rmcontainer/RMContainerImpl.java | 28 +++++- .../scheduler/AbstractYarnScheduler.java | 22 +++++ .../scheduler/AppSchedulingInfo.java | 69 +++++++++++---- .../SchedulerApplicationAttempt.java | 9 +- .../scheduler/capacity/CapacityScheduler.java | 1 + .../common/fica/FiCaSchedulerApp.java | 10 ++- .../scheduler/fair/FSSchedulerApp.java | 10 ++- .../scheduler/fair/FairScheduler.java | 3 +- .../rmcontainer/TestRMContainerImpl.java | 42 +++++++++ .../capacity/TestCapacityScheduler.java | 67 +++++++++++++++ .../scheduler/fair/FairSchedulerTestBase.java | 21 +++++ .../scheduler/fair/TestFairScheduler.java | 86 +++++++++++++++++++ 14 files changed, 352 insertions(+), 24 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 999f9fbc057..15c448e1d05 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -259,6 +259,9 @@ Release 2.5.0 - UNRELEASED YARN-2241. ZKRMStateStore: On startup, show nicer messages if znodes already exist. (Robert Kanter via kasha) + YARN-1408 Preemption caused Invalid State Event: ACQUIRED at KILLED and + caused a task timeout for 30mins. (Sunil G via mayank) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java index 96150c3c2ae..9e9dcb9aa6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; +import java.util.List; + import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -26,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.event.EventHandler; /** @@ -73,5 +76,7 @@ public interface RMContainer extends EventHandler { ContainerReport createContainerReport(); boolean isAMContainer(); + + List getResourceRequests(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index d79a0b76130..eef361f3433 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; import java.util.EnumSet; +import java.util.List; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; @@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -158,6 +160,7 @@ public class RMContainerImpl implements RMContainer { private long finishTime; private ContainerStatus finishedStatus; private boolean isAMContainer; + private List resourceRequests; public RMContainerImpl(Container container, ApplicationAttemptId appAttemptId, NodeId nodeId, String user, @@ -180,7 +183,8 @@ public class RMContainerImpl implements RMContainer { this.eventHandler = rmContext.getDispatcher().getEventHandler(); this.containerAllocationExpirer = rmContext.getContainerAllocationExpirer(); this.isAMContainer = false; - + this.resourceRequests = null; + ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); this.readLock = lock.readLock(); this.writeLock = lock.writeLock(); @@ -311,6 +315,25 @@ public class RMContainerImpl implements RMContainer { readLock.unlock(); } } + + @Override + public List getResourceRequests() { + try { + readLock.lock(); + return resourceRequests; + } finally { + readLock.unlock(); + } + } + + public void setResourceRequests(List requests) { + try { + writeLock.lock(); + this.resourceRequests = requests; + } finally { + writeLock.unlock(); + } + } @Override public String toString() { @@ -432,6 +455,9 @@ public class RMContainerImpl implements RMContainer { @Override public void transition(RMContainerImpl container, RMContainerEvent event) { + // Clear ResourceRequest stored in RMContainer + container.setResourceRequests(null); + // Register with containerAllocationExpirer. container.containerAllocationExpirer.register(container.getContainerId()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 38678845ec1..b3e835a54d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -275,6 +276,27 @@ public abstract class AbstractYarnScheduler return rmContainer; } + /** + * Recover resource request back from RMContainer when a container is + * preempted before AM pulled the same. If container is pulled by + * AM, then RMContainer will not have resource request to recover. + * @param rmContainer + */ + protected void recoverResourceRequestForContainer(RMContainer rmContainer) { + List requests = rmContainer.getResourceRequests(); + + // If container state is moved to ACQUIRED, request will be empty. + if (requests == null) { + return; + } + // Add resource request back to Scheduler. + SchedulerApplicationAttempt schedulerAttempt + = getCurrentAttemptForContainer(rmContainer.getContainerId()); + if (schedulerAttempt != null) { + schedulerAttempt.recoverResourceRequests(requests); + } + } + public SchedulerNode getSchedulerNode(NodeId nodeId) { return nodes.get(nodeId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 581321ca350..a127123a760 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -127,9 +127,10 @@ public class AppSchedulingInfo { * by the application. * * @param requests resources to be acquired + * @param recoverPreemptedRequest recover Resource Request on preemption */ synchronized public void updateResourceRequests( - List requests) { + List requests, boolean recoverPreemptedRequest) { QueueMetrics metrics = queue.getMetrics(); // Update resource requests @@ -163,8 +164,13 @@ public class AppSchedulingInfo { asks = new HashMap(); this.requests.put(priority, asks); this.priorities.add(priority); - } else if (updatePendingResources) { - lastRequest = asks.get(resourceName); + } + lastRequest = asks.get(resourceName); + + if (recoverPreemptedRequest && lastRequest != null) { + // Increment the number of containers to 1, as it is recovering a + // single container. + request.setNumContainers(lastRequest.getNumContainers() + 1); } asks.put(resourceName, request); @@ -254,14 +260,16 @@ public class AppSchedulingInfo { * @param container * the containers allocated. */ - synchronized public void allocate(NodeType type, SchedulerNode node, - Priority priority, ResourceRequest request, Container container) { + synchronized public List allocate(NodeType type, + SchedulerNode node, Priority priority, ResourceRequest request, + Container container) { + List resourceRequests = new ArrayList(); if (type == NodeType.NODE_LOCAL) { - allocateNodeLocal(node, priority, request, container); + allocateNodeLocal(node, priority, request, container, resourceRequests); } else if (type == NodeType.RACK_LOCAL) { - allocateRackLocal(node, priority, request, container); + allocateRackLocal(node, priority, request, container, resourceRequests); } else { - allocateOffSwitch(node, priority, request, container); + allocateOffSwitch(node, priority, request, container, resourceRequests); } QueueMetrics metrics = queue.getMetrics(); if (pending) { @@ -279,6 +287,7 @@ public class AppSchedulingInfo { + " resource=" + request.getCapability()); } metrics.allocateResources(user, 1, request.getCapability(), true); + return resourceRequests; } /** @@ -288,9 +297,9 @@ public class AppSchedulingInfo { * @param allocatedContainers * resources allocated to the application */ - synchronized private void allocateNodeLocal( - SchedulerNode node, Priority priority, - ResourceRequest nodeLocalRequest, Container container) { + synchronized private void allocateNodeLocal(SchedulerNode node, + Priority priority, ResourceRequest nodeLocalRequest, Container container, + List resourceRequests) { // Update future requirements nodeLocalRequest.setNumContainers(nodeLocalRequest.getNumContainers() - 1); if (nodeLocalRequest.getNumContainers() == 0) { @@ -304,7 +313,14 @@ public class AppSchedulingInfo { this.requests.get(priority).remove(node.getRackName()); } - decrementOutstanding(requests.get(priority).get(ResourceRequest.ANY)); + ResourceRequest offRackRequest = requests.get(priority).get( + ResourceRequest.ANY); + decrementOutstanding(offRackRequest); + + // Update cloned NodeLocal, RackLocal and OffRack requests for recovery + resourceRequests.add(cloneResourceRequest(nodeLocalRequest)); + resourceRequests.add(cloneResourceRequest(rackLocalRequest)); + resourceRequests.add(cloneResourceRequest(offRackRequest)); } /** @@ -314,16 +330,22 @@ public class AppSchedulingInfo { * @param allocatedContainers * resources allocated to the application */ - synchronized private void allocateRackLocal( - SchedulerNode node, Priority priority, - ResourceRequest rackLocalRequest, Container container) { + synchronized private void allocateRackLocal(SchedulerNode node, + Priority priority, ResourceRequest rackLocalRequest, Container container, + List resourceRequests) { // Update future requirements rackLocalRequest.setNumContainers(rackLocalRequest.getNumContainers() - 1); if (rackLocalRequest.getNumContainers() == 0) { this.requests.get(priority).remove(node.getRackName()); } - decrementOutstanding(requests.get(priority).get(ResourceRequest.ANY)); + ResourceRequest offRackRequest = requests.get(priority).get( + ResourceRequest.ANY); + decrementOutstanding(offRackRequest); + + // Update cloned RackLocal and OffRack requests for recovery + resourceRequests.add(cloneResourceRequest(rackLocalRequest)); + resourceRequests.add(cloneResourceRequest(offRackRequest)); } /** @@ -333,11 +355,13 @@ public class AppSchedulingInfo { * @param allocatedContainers * resources allocated to the application */ - synchronized private void allocateOffSwitch( - SchedulerNode node, Priority priority, - ResourceRequest offSwitchRequest, Container container) { + synchronized private void allocateOffSwitch(SchedulerNode node, + Priority priority, ResourceRequest offSwitchRequest, Container container, + List resourceRequests) { // Update future requirements decrementOutstanding(offSwitchRequest); + // Update cloned RackLocal and OffRack requests for recovery + resourceRequests.add(cloneResourceRequest(offSwitchRequest)); } synchronized private void decrementOutstanding( @@ -436,4 +460,11 @@ public class AppSchedulingInfo { metrics.allocateResources(user, 1, rmContainer.getAllocatedResource(), false); } + + public ResourceRequest cloneResourceRequest(ResourceRequest request) { + ResourceRequest newRequest = ResourceRequest.newInstance( + request.getPriority(), request.getResourceName(), + request.getCapability(), 1, request.getRelaxLocality()); + return newRequest; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 3a51417cdf7..32dd23b08c1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -241,7 +241,14 @@ public class SchedulerApplicationAttempt { public synchronized void updateResourceRequests( List requests) { if (!isStopped) { - appSchedulingInfo.updateResourceRequests(requests); + appSchedulingInfo.updateResourceRequests(requests, false); + } + } + + public synchronized void recoverResourceRequests( + List requests) { + if (!isStopped) { + appSchedulingInfo.updateResourceRequests(requests, true); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 92727e37f2e..649eb92019e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1089,6 +1089,7 @@ public class CapacityScheduler extends if (LOG.isDebugEnabled()) { LOG.debug("KILL_CONTAINER: container" + cont.toString()); } + recoverResourceRequestForContainer(cont); completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus( cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 470cb106f18..846d1e1396c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -77,6 +78,9 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { if (null == liveContainers.remove(rmContainer.getContainerId())) { return false; } + + // Remove from the list of newly allocated containers if found + newlyAllocatedContainers.remove(rmContainer); Container container = rmContainer.getContainer(); ContainerId containerId = container.getId(); @@ -129,8 +133,12 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { liveContainers.put(container.getId(), rmContainer); // Update consumption and track allocations - appSchedulingInfo.allocate(type, node, priority, request, container); + List resourceRequestList = appSchedulingInfo.allocate( + type, node, priority, request, container); Resources.addTo(currentConsumption, container.getResource()); + + // Update resource requests related to "request" and store in RMContainer + ((RMContainerImpl)rmContainer).setResourceRequests(resourceRequestList); // Inform the container rmContainer.handle( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java index 63a29e4b099..20cf3952d2d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; @@ -82,6 +83,9 @@ public class FSSchedulerApp extends SchedulerApplicationAttempt { Container container = rmContainer.getContainer(); ContainerId containerId = container.getId(); + // Remove from the list of newly allocated containers if found + newlyAllocatedContainers.remove(rmContainer); + // Inform the container rmContainer.handle( new RMContainerFinishedEvent( @@ -281,9 +285,13 @@ public class FSSchedulerApp extends SchedulerApplicationAttempt { liveContainers.put(container.getId(), rmContainer); // Update consumption and track allocations - appSchedulingInfo.allocate(type, node, priority, request, container); + List resourceRequestList = appSchedulingInfo.allocate( + type, node, priority, request, container); Resources.addTo(currentConsumption, container.getResource()); + // Update resource requests related to "request" and store in RMContainer + ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList); + // Inform the container rmContainer.handle( new RMContainerEvent(container.getId(), RMContainerEventType.START)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 6f9b76f010b..7e867554f9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -422,7 +422,7 @@ public class FairScheduler extends } } - private void warnOrKillContainer(RMContainer container) { + protected void warnOrKillContainer(RMContainer container) { ApplicationAttemptId appAttemptId = container.getApplicationAttemptId(); FSSchedulerApp app = getSchedulerApp(appAttemptId); FSLeafQueue queue = app.getQueue(); @@ -440,6 +440,7 @@ public class FairScheduler extends SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); + recoverResourceRequestForContainer(container); // TODO: Not sure if this ever actually adds this to the list of cleanup // containers on the RMNode (see SchedulerNode.releaseContainer()). completedContainer(container, status, RMContainerEventType.KILL); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java index b8f2986c772..44f8381b483 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java @@ -26,6 +26,9 @@ import static org.mockito.Mockito.reset; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; @@ -36,17 +39,24 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; @@ -204,4 +214,36 @@ public class TestRMContainerImpl { assertEquals(RMContainerState.RUNNING, rmContainer.getState()); verify(writer, never()).containerFinished(any(RMContainer.class)); } + + @Test + public void testExistenceOfResourceRequestInRMContainer() throws Exception { + Configuration conf = new Configuration(); + MockRM rm1 = new MockRM(conf); + rm1.start(); + MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000); + RMApp app1 = rm1.submitApp(1024); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + ResourceScheduler scheduler = rm1.getResourceScheduler(); + + // request a container. + am1.allocate("127.0.0.1", 1024, 1, new ArrayList()); + ContainerId containerId2 = ContainerId.newInstance( + am1.getApplicationAttemptId(), 2); + rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED); + + // Verify whether list of ResourceRequest is present in RMContainer + // while moving to ALLOCATED state + Assert.assertNotNull(scheduler.getRMContainer(containerId2) + .getResourceRequests()); + + // Allocate container + am1.allocate(new ArrayList(), new ArrayList()) + .getAllocatedContainers(); + rm1.waitForState(nm1, containerId2, RMContainerState.ACQUIRED); + + // After RMContainer moving to ACQUIRED state, list of ResourceRequest will + // be empty + Assert.assertNull(scheduler.getRMContainer(containerId2) + .getResourceRequests()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 3ca3c480c57..0efd48fa28d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -27,6 +27,7 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.net.InetSocketAddress; import java.security.PrivilegedAction; +import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -79,6 +80,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; @@ -87,6 +90,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicat import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; @@ -947,4 +951,67 @@ public class TestCapacityScheduler { rm1.stop(); } + + @Test(timeout = 30000) + public void testRecoverRequestAfterPreemption() throws Exception { + Configuration conf = new Configuration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + MockRM rm1 = new MockRM(conf); + rm1.start(); + MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 8000); + RMApp app1 = rm1.submitApp(1024); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + + // request a container. + am1.allocate("127.0.0.1", 1024, 1, new ArrayList()); + ContainerId containerId1 = ContainerId.newInstance( + am1.getApplicationAttemptId(), 2); + rm1.waitForState(nm1, containerId1, RMContainerState.ALLOCATED); + + RMContainer rmContainer = cs.getRMContainer(containerId1); + List requests = rmContainer.getResourceRequests(); + FiCaSchedulerApp app = cs.getApplicationAttempt(am1 + .getApplicationAttemptId()); + + FiCaSchedulerNode node = cs.getNode(rmContainer.getAllocatedNode()); + for (ResourceRequest request : requests) { + // Skip the OffRack and RackLocal resource requests. + if (request.getResourceName().equals(node.getRackName()) + || request.getResourceName().equals(ResourceRequest.ANY)) { + continue; + } + + // Already the node local resource request is cleared from RM after + // allocation. + Assert.assertNull(app.getResourceRequest(request.getPriority(), + request.getResourceName())); + } + + // Call killContainer to preempt the container + cs.killContainer(rmContainer); + + Assert.assertEquals(3, requests.size()); + for (ResourceRequest request : requests) { + // Resource request must have added back in RM after preempt event + // handling. + Assert.assertEquals( + 1, + app.getResourceRequest(request.getPriority(), + request.getResourceName()).getNumContainers()); + } + + // New container will be allocated and will move to ALLOCATED state + ContainerId containerId2 = ContainerId.newInstance( + am1.getApplicationAttemptId(), 3); + rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED); + + // allocate container + List containers = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + + // Now with updated ResourceRequest, a container is allocated for AM. + Assert.assertTrue(containers.size() == 1); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index ec942f97696..50f61d8d592 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -167,6 +167,27 @@ public class FairSchedulerTestBase { .put(id.getApplicationId(), rmApp); return id; } + + protected ApplicationAttemptId createSchedulingRequest(String queueId, + String userId, List ask) { + ApplicationAttemptId id = createAppAttemptId(this.APP_ID++, + this.ATTEMPT_ID++); + scheduler.addApplication(id.getApplicationId(), queueId, userId); + // This conditional is for testAclSubmitApplication where app is rejected + // and no app is added. + if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) { + scheduler.addApplicationAttempt(id, false, true); + } + scheduler.allocate(id, ask, new ArrayList(), null, null); + RMApp rmApp = mock(RMApp.class); + RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class); + when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt); + when(rmAppAttempt.getRMAppAttemptMetrics()).thenReturn( + new RMAppAttemptMetrics(id)); + resourceManager.getRMContext().getRMApps() + .put(id.getApplicationId(), rmApp); + return id; + } protected void createSchedulingRequestExistingApplication( int memory, int priority, ApplicationAttemptId attId) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index dbc79d907d3..20c386714dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -53,10 +53,13 @@ import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; @@ -77,11 +80,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -2831,6 +2836,87 @@ public class TestFairScheduler extends FairSchedulerTestBase { } } } + + @Test(timeout=5000) + public void testRecoverRequestAfterPreemption() throws Exception { + conf.setLong(FairSchedulerConfiguration.WAIT_TIME_BEFORE_KILL, 10); + + MockClock clock = new MockClock(); + scheduler.setClock(clock); + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + Priority priority = Priority.newInstance(20); + String host = "127.0.0.1"; + int GB = 1024; + + // Create Node and raised Node Added event + RMNode node = MockNodes.newNodeInfo(1, + Resources.createResource(16 * 1024, 4), 0, host); + NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); + scheduler.handle(nodeEvent); + + // Create 3 container requests and place it in ask + List ask = new ArrayList(); + ResourceRequest nodeLocalRequest = createResourceRequest(GB, 1, host, + priority.getPriority(), 1, true); + ResourceRequest rackLocalRequest = createResourceRequest(GB, 1, + node.getRackName(), priority.getPriority(), 1, true); + ResourceRequest offRackRequest = createResourceRequest(GB, 1, + ResourceRequest.ANY, priority.getPriority(), 1, true); + ask.add(nodeLocalRequest); + ask.add(rackLocalRequest); + ask.add(offRackRequest); + + // Create Request and update + ApplicationAttemptId appAttemptId = createSchedulingRequest("queueA", + "user1", ask); + scheduler.update(); + + // Sufficient node check-ins to fully schedule containers + NodeUpdateSchedulerEvent nodeUpdate = new NodeUpdateSchedulerEvent(node); + scheduler.handle(nodeUpdate); + + assertEquals(1, scheduler.getSchedulerApp(appAttemptId).getLiveContainers() + .size()); + FSSchedulerApp app = scheduler.getSchedulerApp(appAttemptId); + + // ResourceRequest will be empty once NodeUpdate is completed + Assert.assertNull(app.getResourceRequest(priority, host)); + + ContainerId containerId1 = ContainerId.newInstance(appAttemptId, 1); + RMContainer rmContainer = app.getRMContainer(containerId1); + + // Create a preempt event and register for preemption + scheduler.warnOrKillContainer(rmContainer); + + // Wait for few clock ticks + clock.tick(5); + + // preempt now + scheduler.warnOrKillContainer(rmContainer); + + List requests = rmContainer.getResourceRequests(); + // Once recovered, resource request will be present again in app + Assert.assertEquals(3, requests.size()); + for (ResourceRequest request : requests) { + Assert.assertEquals(1, + app.getResourceRequest(priority, request.getResourceName()) + .getNumContainers()); + } + + // Send node heartbeat + scheduler.update(); + scheduler.handle(nodeUpdate); + + List containers = scheduler.allocate(appAttemptId, + Collections. emptyList(), + Collections. emptyList(), null, null).getContainers(); + + // Now with updated ResourceRequest, a container is allocated for AM. + Assert.assertTrue(containers.size() == 1); + } @SuppressWarnings("resource") @Test From 2d91a0c0f1738d10ad601b607079cc9f37b8efbc Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Tue, 15 Jul 2014 22:53:11 +0000 Subject: [PATCH 02/49] HADOOP-7664. Move attribution to release 2.6.0 section in CHANGES.txt. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610873 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 21f95d47d0b..749c72f5fd5 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -36,10 +36,6 @@ Trunk (Unreleased) HADOOP-7595. Upgrade dependency to Avro 1.5.3. (Alejandro Abdelnur via atm) - HADOOP-7664. Remove warmings when overriding final parameter configuration - if the override value is same as the final parameter value. - (Ravi Prakash via suresh) - HADOOP-8078. Add capability to turn on security in unit tests. (Jaimin Jetly via jitendra) @@ -401,6 +397,10 @@ Release 2.6.0 - UNRELEASED HADOOP-10815. Implement Windows equivalent of mlock. (cnauroth) + HADOOP-7664. Remove warmings when overriding final parameter configuration + if the override value is same as the final parameter value. + (Ravi Prakash via suresh) + OPTIMIZATIONS BUG FIXES From 030580387a4d8d97560a93da2fd7494b4366e3b6 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 15 Jul 2014 23:00:17 +0000 Subject: [PATCH 03/49] YARN-2233. Implemented ResourceManager web-services to create, renew and cancel delegation tokens. Contributed by Varun Vasudev. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610876 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-auth/pom.xml | 11 + hadoop-yarn-project/CHANGES.txt | 3 + .../pom.xml | 15 + .../RMDelegationTokenSecretManager.java | 12 + .../resourcemanager/webapp/RMWebServices.java | 279 ++++++- .../webapp/dao/DelegationToken.java | 99 +++ .../TestRMWebServicesDelegationTokens.java | 784 ++++++++++++++++++ .../src/site/apt/ResourceManagerRest.apt.vm | 220 +++++ 8 files changed, 1410 insertions(+), 13 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/DelegationToken.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesDelegationTokens.java diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml index 5fcb938d232..a501799ea15 100644 --- a/hadoop-common-project/hadoop-auth/pom.xml +++ b/hadoop-common-project/hadoop-auth/pom.xml @@ -139,6 +139,17 @@ true + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 15c448e1d05..41fd5bfd5a3 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -95,6 +95,9 @@ Release 2.5.0 - UNRELEASED YARN-1713. Added get-new-app and submit-app functionality to RM web services. (Varun Vasudev via vinodkv) + YARN-2233. Implemented ResourceManager web-services to create, renew and + cancel delegation tokens. (Varun Vasudev via vinodkv) + IMPROVEMENTS YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml index 91dc26c01d5..c2a94ead159 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml @@ -192,6 +192,21 @@ test + + + org.apache.hadoop + hadoop-minikdc + test + + + + org.apache.hadoop + hadoop-auth + test + test-jar + ${project.version} + + com.sun.jersey.jersey-test-framework diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java index ae786d75d09..90706ff8c94 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java @@ -29,8 +29,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; import org.apache.hadoop.security.token.delegation.DelegationKey; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -193,4 +195,14 @@ public class RMDelegationTokenSecretManager extends addPersistedDelegationToken(entry.getKey(), entry.getValue()); } } + + public long getRenewDate(RMDelegationTokenIdentifier ident) + throws InvalidToken { + DelegationTokenInformation info = currentTokens.get(ident); + if (info == null) { + throw new InvalidToken("token (" + ident.toString() + + ") can't be found in cache"); + } + return info.getRenewDate(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 2c2a7aaed9b..0493efdb7d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.lang.reflect.UndeclaredThrowableException; import java.security.AccessControlException; import java.nio.ByteBuffer; +import java.security.Principal; import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.Collection; @@ -36,6 +37,7 @@ import java.util.concurrent.ConcurrentMap; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -57,6 +59,8 @@ import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.apache.hadoop.security.authentication.server.KerberosAuthenticationHandler; import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; @@ -67,6 +71,13 @@ import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; +import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; @@ -85,6 +96,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; @@ -109,6 +121,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedule import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CredentialsInfo; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FifoSchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LocalResourceInfo; @@ -118,6 +131,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerTypeInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.StatisticsItemInfo; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.NotFoundException; @@ -139,6 +153,9 @@ public class RMWebServices { private final Configuration conf; private @Context HttpServletResponse response; + public final static String DELEGATION_TOKEN_HEADER = + "Hadoop-YARN-RM-Delegation-Token"; + @Inject public RMWebServices(final ResourceManager rm, Configuration conf) { this.rm = rm; @@ -147,11 +164,7 @@ public class RMWebServices { protected Boolean hasAccess(RMApp app, HttpServletRequest hsr) { // Check for the authorization. - String remoteUser = hsr.getRemoteUser(); - UserGroupInformation callerUGI = null; - if (remoteUser != null) { - callerUGI = UserGroupInformation.createRemoteUser(remoteUser); - } + UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true); if (callerUGI != null && !(this.rm.getApplicationACLsManager().checkAccess(callerUGI, ApplicationAccessType.VIEW_APP, app.getUser(), @@ -626,7 +639,7 @@ public class RMWebServices { public AppState getAppState(@Context HttpServletRequest hsr, @PathParam("appid") String appId) throws AuthorizationException { init(); - UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr); + UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true); String userName = ""; if (callerUGI != null) { userName = callerUGI.getUserName(); @@ -661,7 +674,7 @@ public class RMWebServices { IOException { init(); - UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr); + UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true); if (callerUGI == null) { String msg = "Unable to obtain user name, user not authenticated"; throw new AuthorizationException(msg); @@ -771,9 +784,14 @@ public class RMWebServices { } private UserGroupInformation getCallerUserGroupInformation( - HttpServletRequest hsr) { + HttpServletRequest hsr, boolean usePrincipal) { String remoteUser = hsr.getRemoteUser(); + if (usePrincipal) { + Principal princ = hsr.getUserPrincipal(); + remoteUser = princ == null ? null : princ.getName(); + } + UserGroupInformation callerUGI = null; if (remoteUser != null) { callerUGI = UserGroupInformation.createRemoteUser(remoteUser); @@ -799,7 +817,7 @@ public class RMWebServices { public Response createNewApplication(@Context HttpServletRequest hsr) throws AuthorizationException, IOException, InterruptedException { init(); - UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr); + UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true); if (callerUGI == null) { throw new AuthorizationException("Unable to obtain user name, " + "user not authenticated"); @@ -835,7 +853,7 @@ public class RMWebServices { IOException, InterruptedException { init(); - UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr); + UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true); if (callerUGI == null) { throw new AuthorizationException("Unable to obtain user name, " + "user not authenticated"); @@ -887,8 +905,8 @@ public class RMWebServices { throw new YarnRuntimeException(msg, e); } NewApplication appId = - new NewApplication(resp.getApplicationId().toString(), new ResourceInfo( - resp.getMaximumResourceCapability())); + new NewApplication(resp.getApplicationId().toString(), + new ResourceInfo(resp.getMaximumResourceCapability())); return appId; } @@ -962,7 +980,8 @@ public class RMWebServices { * @throws IOException */ protected ContainerLaunchContext createContainerLaunchContext( - ApplicationSubmissionContextInfo newApp) throws BadRequestException, IOException { + ApplicationSubmissionContextInfo newApp) throws BadRequestException, + IOException { // create container launch context @@ -1033,4 +1052,238 @@ public class RMWebServices { } return ret; } + + private UserGroupInformation createKerberosUserGroupInformation( + HttpServletRequest hsr) throws AuthorizationException, YarnException { + + UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true); + if (callerUGI == null) { + String msg = "Unable to obtain user name, user not authenticated"; + throw new AuthorizationException(msg); + } + + String authType = hsr.getAuthType(); + if (!KerberosAuthenticationHandler.TYPE.equals(authType)) { + String msg = + "Delegation token operations can only be carried out on a " + + "Kerberos authenticated channel"; + throw new YarnException(msg); + } + + callerUGI.setAuthenticationMethod(AuthenticationMethod.KERBEROS); + return callerUGI; + } + + @POST + @Path("/delegation-token") + @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + @Consumes({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + public Response postDelegationToken(DelegationToken tokenData, + @Context HttpServletRequest hsr) throws AuthorizationException, + IOException, InterruptedException, Exception { + + init(); + UserGroupInformation callerUGI; + try { + callerUGI = createKerberosUserGroupInformation(hsr); + } catch (YarnException ye) { + return Response.status(Status.FORBIDDEN).entity(ye.getMessage()).build(); + } + return createDelegationToken(tokenData, hsr, callerUGI); + } + + @POST + @Path("/delegation-token/expiration") + @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + @Consumes({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + public Response + postDelegationTokenExpiration(@Context HttpServletRequest hsr) + throws AuthorizationException, IOException, InterruptedException, + Exception { + + init(); + UserGroupInformation callerUGI; + try { + callerUGI = createKerberosUserGroupInformation(hsr); + } catch (YarnException ye) { + return Response.status(Status.FORBIDDEN).entity(ye.getMessage()).build(); + } + + DelegationToken requestToken = new DelegationToken(); + requestToken.setToken(extractToken(hsr).encodeToUrlString()); + return renewDelegationToken(requestToken, hsr, callerUGI); + } + + private Response createDelegationToken(DelegationToken tokenData, + HttpServletRequest hsr, UserGroupInformation callerUGI) + throws AuthorizationException, IOException, InterruptedException, + Exception { + + final String renewer = tokenData.getRenewer(); + GetDelegationTokenResponse resp; + try { + resp = + callerUGI + .doAs(new PrivilegedExceptionAction() { + @Override + public GetDelegationTokenResponse run() throws IOException, + YarnException { + GetDelegationTokenRequest createReq = + GetDelegationTokenRequest.newInstance(renewer); + return rm.getClientRMService().getDelegationToken(createReq); + } + }); + } catch (Exception e) { + LOG.info("Create delegation token request failed", e); + throw e; + } + + Token tk = + new Token(resp.getRMDelegationToken() + .getIdentifier().array(), resp.getRMDelegationToken().getPassword() + .array(), new Text(resp.getRMDelegationToken().getKind()), new Text( + resp.getRMDelegationToken().getService())); + RMDelegationTokenIdentifier identifier = tk.decodeIdentifier(); + long currentExpiration = + rm.getRMContext().getRMDelegationTokenSecretManager() + .getRenewDate(identifier); + DelegationToken respToken = + new DelegationToken(tk.encodeToUrlString(), renewer, identifier + .getOwner().toString(), tk.getKind().toString(), currentExpiration, + identifier.getMaxDate()); + return Response.status(Status.OK).entity(respToken).build(); + } + + private Response renewDelegationToken(DelegationToken tokenData, + HttpServletRequest hsr, UserGroupInformation callerUGI) + throws AuthorizationException, IOException, InterruptedException, + Exception { + + Token token = + extractToken(tokenData.getToken()); + + org.apache.hadoop.yarn.api.records.Token dToken = + BuilderUtils.newDelegationToken(token.getIdentifier(), token.getKind() + .toString(), token.getPassword(), token.getService().toString()); + final RenewDelegationTokenRequest req = + RenewDelegationTokenRequest.newInstance(dToken); + + RenewDelegationTokenResponse resp; + try { + resp = + callerUGI + .doAs(new PrivilegedExceptionAction() { + @Override + public RenewDelegationTokenResponse run() throws IOException, + YarnException { + return rm.getClientRMService().renewDelegationToken(req); + } + }); + } catch (UndeclaredThrowableException ue) { + if (ue.getCause() instanceof YarnException) { + if (ue.getCause().getCause() instanceof InvalidToken) { + throw new BadRequestException(ue.getCause().getCause().getMessage()); + } else if (ue.getCause().getCause() instanceof org.apache.hadoop.security.AccessControlException) { + return Response.status(Status.FORBIDDEN) + .entity(ue.getCause().getCause().getMessage()).build(); + } + LOG.info("Renew delegation token request failed", ue); + throw ue; + } + LOG.info("Renew delegation token request failed", ue); + throw ue; + } catch (Exception e) { + LOG.info("Renew delegation token request failed", e); + throw e; + } + long renewTime = resp.getNextExpirationTime(); + + DelegationToken respToken = new DelegationToken(); + respToken.setNextExpirationTime(renewTime); + return Response.status(Status.OK).entity(respToken).build(); + } + + // For cancelling tokens, the encoded token is passed as a header + // There are two reasons for this - + // 1. Passing a request body as part of a DELETE request is not + // allowed by Jetty + // 2. Passing the encoded token as part of the url is not ideal + // since urls tend to get logged and anyone with access to + // the logs can extract tokens which are meant to be secret + @DELETE + @Path("/delegation-token") + @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + public Response cancelDelegationToken(@Context HttpServletRequest hsr) + throws AuthorizationException, IOException, InterruptedException, + Exception { + + init(); + UserGroupInformation callerUGI; + try { + callerUGI = createKerberosUserGroupInformation(hsr); + } catch (YarnException ye) { + return Response.status(Status.FORBIDDEN).entity(ye.getMessage()).build(); + } + + Token token = extractToken(hsr); + + org.apache.hadoop.yarn.api.records.Token dToken = + BuilderUtils.newDelegationToken(token.getIdentifier(), token.getKind() + .toString(), token.getPassword(), token.getService().toString()); + final CancelDelegationTokenRequest req = + CancelDelegationTokenRequest.newInstance(dToken); + + try { + callerUGI + .doAs(new PrivilegedExceptionAction() { + @Override + public CancelDelegationTokenResponse run() throws IOException, + YarnException { + return rm.getClientRMService().cancelDelegationToken(req); + } + }); + } catch (UndeclaredThrowableException ue) { + if (ue.getCause() instanceof YarnException) { + if (ue.getCause().getCause() instanceof InvalidToken) { + throw new BadRequestException(ue.getCause().getCause().getMessage()); + } else if (ue.getCause().getCause() instanceof org.apache.hadoop.security.AccessControlException) { + return Response.status(Status.FORBIDDEN) + .entity(ue.getCause().getCause().getMessage()).build(); + } + LOG.info("Renew delegation token request failed", ue); + throw ue; + } + LOG.info("Renew delegation token request failed", ue); + throw ue; + } catch (Exception e) { + LOG.info("Renew delegation token request failed", e); + throw e; + } + + return Response.status(Status.OK).build(); + } + + private Token extractToken( + HttpServletRequest request) { + String encodedToken = request.getHeader(DELEGATION_TOKEN_HEADER); + if (encodedToken == null) { + String msg = + "Header '" + DELEGATION_TOKEN_HEADER + + "' containing encoded token not found"; + throw new BadRequestException(msg); + } + return extractToken(encodedToken); + } + + private Token extractToken(String encodedToken) { + Token token = + new Token(); + try { + token.decodeFromUrlString(encodedToken); + } catch (Exception ie) { + String msg = "Could not decode encoded token"; + throw new BadRequestException(msg); + } + return token; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/DelegationToken.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/DelegationToken.java new file mode 100644 index 00000000000..dea5d584ec3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/DelegationToken.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement(name = "delegation-token") +@XmlAccessorType(XmlAccessType.FIELD) +public class DelegationToken { + + String token; + String renewer; + String owner; + String kind; + @XmlElement(name = "expiration-time") + Long nextExpirationTime; + @XmlElement(name = "max-validity") + Long maxValidity; + + public DelegationToken() { + } + + public DelegationToken(String token, String renewer, String owner, + String kind, Long nextExpirationTime, Long maxValidity) { + this.token = token; + this.renewer = renewer; + this.owner = owner; + this.kind = kind; + this.nextExpirationTime = nextExpirationTime; + this.maxValidity = maxValidity; + } + + public String getToken() { + return token; + } + + public String getRenewer() { + return renewer; + } + + public Long getNextExpirationTime() { + return nextExpirationTime; + } + + public void setToken(String token) { + this.token = token; + } + + public void setRenewer(String renewer) { + this.renewer = renewer; + } + + public void setNextExpirationTime(long nextExpirationTime) { + this.nextExpirationTime = Long.valueOf(nextExpirationTime); + } + + public String getOwner() { + return owner; + } + + public String getKind() { + return kind; + } + + public Long getMaxValidity() { + return maxValidity; + } + + public void setOwner(String owner) { + this.owner = owner; + } + + public void setKind(String kind) { + this.kind = kind; + } + + public void setMaxValidity(Long maxValidity) { + this.maxValidity = maxValidity; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesDelegationTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesDelegationTokens.java new file mode 100644 index 00000000000..9d25105bd4d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesDelegationTokens.java @@ -0,0 +1,784 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.webapp; + +import java.io.File; +import java.io.IOException; +import java.io.StringReader; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.Callable; + +import javax.servlet.FilterConfig; +import javax.servlet.ServletException; +import javax.ws.rs.core.MediaType; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.minikdc.MiniKdc; +import org.apache.hadoop.security.authentication.KerberosTestUtils; +import org.apache.hadoop.security.authentication.server.AuthenticationFilter; +import org.apache.hadoop.security.authentication.server.KerberosAuthenticationHandler; +import org.apache.hadoop.security.authentication.server.PseudoAuthenticationHandler; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.security.QueueACLsManager; +import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.DelegationToken; +import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; +import org.apache.hadoop.yarn.webapp.WebServicesTestUtils; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +import com.google.inject.Guice; +import com.google.inject.Injector; +import com.google.inject.Singleton; +import com.google.inject.servlet.GuiceServletContextListener; +import com.google.inject.servlet.ServletModule; +import com.sun.jersey.api.client.ClientResponse; +import com.sun.jersey.api.client.ClientResponse.Status; +import com.sun.jersey.api.client.filter.LoggingFilter; +import com.sun.jersey.guice.spi.container.servlet.GuiceContainer; +import com.sun.jersey.test.framework.JerseyTest; +import com.sun.jersey.test.framework.WebAppDescriptor; + +@RunWith(Parameterized.class) +public class TestRMWebServicesDelegationTokens extends JerseyTest { + + private static final File testRootDir = new File("target", + TestRMWebServicesDelegationTokens.class.getName() + "-root"); + private static File httpSpnegoKeytabFile = new File( + KerberosTestUtils.getKeytabFile()); + + private static String httpSpnegoPrincipal = KerberosTestUtils + .getServerPrincipal(); + + private static boolean miniKDCStarted = false; + private static MiniKdc testMiniKDC; + static { + try { + testMiniKDC = new MiniKdc(MiniKdc.createConf(), testRootDir); + } catch (Exception e) { + assertTrue("Couldn't create MiniKDC", false); + } + } + + private static MockRM rm; + + private Injector injector; + + private boolean isKerberosAuth = false; + + // Make sure the test uses the published header string + final String yarnTokenHeader = "Hadoop-YARN-RM-Delegation-Token"; + + @Singleton + public static class TestKerberosAuthFilter extends AuthenticationFilter { + @Override + protected Properties getConfiguration(String configPrefix, + FilterConfig filterConfig) throws ServletException { + + Properties properties = + super.getConfiguration(configPrefix, filterConfig); + + properties.put(KerberosAuthenticationHandler.PRINCIPAL, + httpSpnegoPrincipal); + properties.put(KerberosAuthenticationHandler.KEYTAB, + httpSpnegoKeytabFile.getAbsolutePath()); + properties.put(AuthenticationFilter.AUTH_TYPE, "kerberos"); + return properties; + } + } + + @Singleton + public static class TestSimpleAuthFilter extends AuthenticationFilter { + @Override + protected Properties getConfiguration(String configPrefix, + FilterConfig filterConfig) throws ServletException { + + Properties properties = + super.getConfiguration(configPrefix, filterConfig); + + properties.put(KerberosAuthenticationHandler.PRINCIPAL, + httpSpnegoPrincipal); + properties.put(KerberosAuthenticationHandler.KEYTAB, + httpSpnegoKeytabFile.getAbsolutePath()); + properties.put(AuthenticationFilter.AUTH_TYPE, "simple"); + properties.put(PseudoAuthenticationHandler.ANONYMOUS_ALLOWED, "false"); + return properties; + } + } + + private class TestServletModule extends ServletModule { + public Configuration rmconf = new Configuration(); + + @Override + protected void configureServlets() { + bind(JAXBContextResolver.class); + bind(RMWebServices.class); + bind(GenericExceptionHandler.class); + Configuration rmconf = new Configuration(); + rmconf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); + rmconf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, + ResourceScheduler.class); + rmconf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true); + rm = new MockRM(rmconf); + bind(ResourceManager.class).toInstance(rm); + bind(RMContext.class).toInstance(rm.getRMContext()); + bind(ApplicationACLsManager.class).toInstance( + rm.getApplicationACLsManager()); + bind(QueueACLsManager.class).toInstance(rm.getQueueACLsManager()); + if (isKerberosAuth == true) { + filter("/*").through(TestKerberosAuthFilter.class); + } else { + filter("/*").through(TestSimpleAuthFilter.class); + } + serve("/*").with(GuiceContainer.class); + } + } + + private Injector getSimpleAuthInjector() { + return Guice.createInjector(new TestServletModule() { + @Override + protected void configureServlets() { + isKerberosAuth = false; + rmconf.set( + CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + super.configureServlets(); + } + }); + } + + private Injector getKerberosAuthInjector() { + return Guice.createInjector(new TestServletModule() { + @Override + protected void configureServlets() { + isKerberosAuth = true; + rmconf.set( + CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "kerberos"); + rmconf.set(YarnConfiguration.RM_WEBAPP_SPNEGO_USER_NAME_KEY, + httpSpnegoPrincipal); + rmconf.set(YarnConfiguration.RM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY, + httpSpnegoKeytabFile.getAbsolutePath()); + rmconf.set(YarnConfiguration.NM_WEBAPP_SPNEGO_USER_NAME_KEY, + httpSpnegoPrincipal); + rmconf.set(YarnConfiguration.NM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY, + httpSpnegoKeytabFile.getAbsolutePath()); + + super.configureServlets(); + } + }); + } + + public class GuiceServletConfig extends GuiceServletContextListener { + + @Override + protected Injector getInjector() { + return injector; + } + } + + @Parameters + public static Collection guiceConfigs() { + return Arrays.asList(new Object[][] { { 0 }, { 1 } }); + } + + public TestRMWebServicesDelegationTokens(int run) throws Exception { + super(new WebAppDescriptor.Builder( + "org.apache.hadoop.yarn.server.resourcemanager.webapp") + .contextListenerClass(GuiceServletConfig.class) + .filterClass(com.google.inject.servlet.GuiceFilter.class) + .contextPath("jersey-guice-filter").servletPath("/").build()); + setupKDC(); + switch (run) { + case 0: + default: + injector = getKerberosAuthInjector(); + break; + case 1: + injector = getSimpleAuthInjector(); + break; + } + } + + private void setupKDC() throws Exception { + if (miniKDCStarted == false) { + testMiniKDC.start(); + getKdc().createPrincipal(httpSpnegoKeytabFile, "HTTP/localhost", + "client", "client2", "client3"); + miniKDCStarted = true; + } + } + + private MiniKdc getKdc() { + return testMiniKDC; + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + httpSpnegoKeytabFile.deleteOnExit(); + testRootDir.deleteOnExit(); + } + + @After + @Override + public void tearDown() throws Exception { + rm.stop(); + super.tearDown(); + } + + // Simple test - try to create a delegation token via web services and check + // to make sure we get back a valid token. Validate token using RM function + // calls. It should only succeed with the kerberos filter + @Test + public void testCreateDelegationToken() throws Exception { + rm.start(); + this.client().addFilter(new LoggingFilter(System.out)); + final String renewer = "test-renewer"; + String jsonBody = "{ \"renewer\" : \"" + renewer + "\" }"; + String xmlBody = + "" + renewer + + ""; + String[] mediaTypes = + { MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }; + Map bodyMap = new HashMap(); + bodyMap.put(MediaType.APPLICATION_JSON, jsonBody); + bodyMap.put(MediaType.APPLICATION_XML, xmlBody); + for (final String mediaType : mediaTypes) { + final String body = bodyMap.get(mediaType); + for (final String contentType : mediaTypes) { + if (isKerberosAuth == true) { + verifyKerberosAuthCreate(mediaType, contentType, body, renewer); + } else { + verifySimpleAuthCreate(mediaType, contentType, body); + } + } + } + + rm.stop(); + return; + } + + private void verifySimpleAuthCreate(String mediaType, String contentType, + String body) { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").queryParam("user.name", "testuser") + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.FORBIDDEN, response.getClientResponseStatus()); + } + + private void verifyKerberosAuthCreate(String mType, String cType, + String reqBody, String renUser) throws Exception { + final String mediaType = mType; + final String contentType = cType; + final String body = reqBody; + final String renewer = renUser; + KerberosTestUtils.doAsClient(new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(body, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + assertFalse(tok.getToken().isEmpty()); + Token token = + new Token(); + token.decodeFromUrlString(tok.getToken()); + assertEquals(renewer, token.decodeIdentifier().getRenewer().toString()); + assertValidRMToken(tok.getToken()); + DelegationToken dtoken = new DelegationToken(); + response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(dtoken, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + tok = getDelegationTokenFromResponse(response); + assertFalse(tok.getToken().isEmpty()); + token = new Token(); + token.decodeFromUrlString(tok.getToken()); + assertEquals("", token.decodeIdentifier().getRenewer().toString()); + assertValidRMToken(tok.getToken()); + return null; + } + }); + } + + // Test to verify renew functionality - create a token and then try to renew + // it. The renewer should succeed; owner and third user should fail + @Test + public void testRenewDelegationToken() throws Exception { + client().addFilter(new LoggingFilter(System.out)); + rm.start(); + final String renewer = "client2"; + this.client().addFilter(new LoggingFilter(System.out)); + final DelegationToken dummyToken = new DelegationToken(); + dummyToken.setRenewer(renewer); + String[] mediaTypes = + { MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }; + for (final String mediaType : mediaTypes) { + for (final String contentType : mediaTypes) { + + if (isKerberosAuth == false) { + verifySimpleAuthRenew(mediaType, contentType); + continue; + } + + // test "client" and client2" trying to renew "client" token + final DelegationToken responseToken = + KerberosTestUtils.doAsClient(new Callable() { + @Override + public DelegationToken call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(dummyToken, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + assertFalse(tok.getToken().isEmpty()); + String body = generateRenewTokenBody(mediaType, tok.getToken()); + response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").path("expiration") + .header(yarnTokenHeader, tok.getToken()) + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.FORBIDDEN, + response.getClientResponseStatus()); + return tok; + } + }); + + KerberosTestUtils.doAs(renewer, new Callable() { + @Override + public DelegationToken call() throws Exception { + // renew twice so that we can confirm that the + // expiration time actually changes + long oldExpirationTime = Time.now(); + assertValidRMToken(responseToken.getToken()); + String body = + generateRenewTokenBody(mediaType, responseToken.getToken()); + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").path("expiration") + .header(yarnTokenHeader, responseToken.getToken()) + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + String message = + "Expiration time not as expected: old = " + oldExpirationTime + + "; new = " + tok.getNextExpirationTime(); + assertTrue(message, tok.getNextExpirationTime() > oldExpirationTime); + oldExpirationTime = tok.getNextExpirationTime(); + // artificial sleep to ensure we get a different expiration time + Thread.sleep(1000); + response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").path("expiration") + .header(yarnTokenHeader, responseToken.getToken()) + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + tok = getDelegationTokenFromResponse(response); + message = + "Expiration time not as expected: old = " + oldExpirationTime + + "; new = " + tok.getNextExpirationTime(); + assertTrue(message, tok.getNextExpirationTime() > oldExpirationTime); + return tok; + } + }); + + // test unauthorized user renew attempt + KerberosTestUtils.doAs("client3", new Callable() { + @Override + public DelegationToken call() throws Exception { + String body = + generateRenewTokenBody(mediaType, responseToken.getToken()); + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").path("expiration") + .header(yarnTokenHeader, responseToken.getToken()) + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.FORBIDDEN, response.getClientResponseStatus()); + return null; + } + }); + + // test bad request - incorrect format, empty token string and random + // token string + KerberosTestUtils.doAsClient(new Callable() { + @Override + public Void call() throws Exception { + String token = "TEST_TOKEN_STRING"; + String body = ""; + if (mediaType.equals(MediaType.APPLICATION_JSON)) { + body = "{\"token\": \"" + token + "\" }"; + } else { + body = + "" + token + + ""; + } + + // missing token header + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").path("expiration") + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.BAD_REQUEST, response.getClientResponseStatus()); + return null; + } + }); + } + } + + rm.stop(); + return; + } + + private void verifySimpleAuthRenew(String mediaType, String contentType) { + String token = "TEST_TOKEN_STRING"; + String body = ""; + // contents of body don't matter because the request processing shouldn't + // get that far + if (mediaType.equals(MediaType.APPLICATION_JSON)) { + body = "{\"token\": \"" + token + "\" }"; + body = "{\"abcd\": \"test-123\" }"; + } else { + body = + "" + token + ""; + body = "abcd"; + } + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").queryParam("user.name", "testuser") + .accept(contentType).entity(body, mediaType) + .post(ClientResponse.class); + assertEquals(Status.FORBIDDEN, response.getClientResponseStatus()); + } + + // Test to verify cancel functionality - create a token and then try to cancel + // it. The owner and renewer should succeed; third user should fail + @Test + public void testCancelDelegationToken() throws Exception { + rm.start(); + this.client().addFilter(new LoggingFilter(System.out)); + if (isKerberosAuth == false) { + verifySimpleAuthCancel(); + return; + } + + final DelegationToken dtoken = new DelegationToken(); + String renewer = "client2"; + dtoken.setRenewer(renewer); + String[] mediaTypes = + { MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }; + for (final String mediaType : mediaTypes) { + for (final String contentType : mediaTypes) { + + // owner should be able to cancel delegation token + KerberosTestUtils.doAsClient(new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(dtoken, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token") + .header(yarnTokenHeader, tok.getToken()).accept(contentType) + .delete(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + assertTokenCancelled(tok.getToken()); + return null; + } + }); + + // renewer should be able to cancel token + final DelegationToken tmpToken = + KerberosTestUtils.doAsClient(new Callable() { + @Override + public DelegationToken call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(dtoken, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + return tok; + } + }); + + KerberosTestUtils.doAs(renewer, new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token") + .header(yarnTokenHeader, tmpToken.getToken()) + .accept(contentType).delete(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + assertTokenCancelled(tmpToken.getToken()); + return null; + } + }); + + // third user should not be able to cancel token + final DelegationToken tmpToken2 = + KerberosTestUtils.doAsClient(new Callable() { + @Override + public DelegationToken call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(dtoken, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + return tok; + } + }); + + KerberosTestUtils.doAs("client3", new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token") + .header(yarnTokenHeader, tmpToken2.getToken()) + .accept(contentType).delete(ClientResponse.class); + assertEquals(Status.FORBIDDEN, response.getClientResponseStatus()); + assertValidRMToken(tmpToken2.getToken()); + return null; + } + }); + + testCancelTokenBadRequests(mediaType, contentType); + } + } + + rm.stop(); + return; + } + + private void testCancelTokenBadRequests(String mType, String cType) + throws Exception { + + final String mediaType = mType; + final String contentType = cType; + final DelegationToken dtoken = new DelegationToken(); + String renewer = "client2"; + dtoken.setRenewer(renewer); + + // bad request(invalid header value) + KerberosTestUtils.doAsClient(new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token") + .header(yarnTokenHeader, "random-string").accept(contentType) + .delete(ClientResponse.class); + assertEquals(Status.BAD_REQUEST, response.getClientResponseStatus()); + return null; + } + }); + + // bad request(missing header) + KerberosTestUtils.doAsClient(new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .delete(ClientResponse.class); + assertEquals(Status.BAD_REQUEST, response.getClientResponseStatus()); + + return null; + } + }); + + // bad request(cancelled token) + final DelegationToken tmpToken = + KerberosTestUtils.doAsClient(new Callable() { + @Override + public DelegationToken call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").accept(contentType) + .entity(dtoken, mediaType).post(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + DelegationToken tok = getDelegationTokenFromResponse(response); + return tok; + } + }); + + KerberosTestUtils.doAs(renewer, new Callable() { + @Override + public Void call() throws Exception { + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token") + .header(yarnTokenHeader, tmpToken.getToken()).accept(contentType) + .delete(ClientResponse.class); + assertEquals(Status.OK, response.getClientResponseStatus()); + response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token") + .header(yarnTokenHeader, tmpToken.getToken()).accept(contentType) + .delete(ClientResponse.class); + assertEquals(Status.BAD_REQUEST, response.getClientResponseStatus()); + return null; + } + }); + } + + private void verifySimpleAuthCancel() { + // contents of header don't matter; request should never get that far + ClientResponse response = + resource().path("ws").path("v1").path("cluster") + .path("delegation-token").queryParam("user.name", "testuser") + .header(RMWebServices.DELEGATION_TOKEN_HEADER, "random") + .delete(ClientResponse.class); + assertEquals(Status.FORBIDDEN, response.getClientResponseStatus()); + } + + private DelegationToken + getDelegationTokenFromResponse(ClientResponse response) + throws IOException, ParserConfigurationException, SAXException, + JSONException { + if (response.getType().toString().equals(MediaType.APPLICATION_JSON)) { + return getDelegationTokenFromJson(response.getEntity(JSONObject.class)); + } + return getDelegationTokenFromXML(response.getEntity(String.class)); + } + + public static DelegationToken getDelegationTokenFromXML(String tokenXML) + throws IOException, ParserConfigurationException, SAXException { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + InputSource is = new InputSource(); + is.setCharacterStream(new StringReader(tokenXML)); + Document dom = db.parse(is); + NodeList nodes = dom.getElementsByTagName("delegation-token"); + assertEquals("incorrect number of elements", 1, nodes.getLength()); + Element element = (Element) nodes.item(0); + DelegationToken ret = new DelegationToken(); + String token = WebServicesTestUtils.getXmlString(element, "token"); + if (token != null) { + ret.setToken(token); + } else { + long expiration = + WebServicesTestUtils.getXmlLong(element, "expiration-time"); + ret.setNextExpirationTime(expiration); + } + return ret; + } + + public static DelegationToken getDelegationTokenFromJson(JSONObject json) + throws JSONException { + DelegationToken ret = new DelegationToken(); + if (json.has("token")) { + ret.setToken(json.getString("token")); + } else if (json.has("expiration-time")) { + ret.setNextExpirationTime(json.getLong("expiration-time")); + } + return ret; + } + + private void assertValidRMToken(String encodedToken) throws IOException { + Token realToken = + new Token(); + realToken.decodeFromUrlString(encodedToken); + RMDelegationTokenIdentifier ident = realToken.decodeIdentifier(); + rm.getRMContext().getRMDelegationTokenSecretManager() + .verifyToken(ident, realToken.getPassword()); + assertTrue(rm.getRMContext().getRMDelegationTokenSecretManager() + .getAllTokens().containsKey(ident)); + } + + private void assertTokenCancelled(String encodedToken) throws Exception { + Token realToken = + new Token(); + realToken.decodeFromUrlString(encodedToken); + RMDelegationTokenIdentifier ident = realToken.decodeIdentifier(); + boolean exceptionCaught = false; + try { + rm.getRMContext().getRMDelegationTokenSecretManager() + .verifyToken(ident, realToken.getPassword()); + } catch (InvalidToken it) { + exceptionCaught = true; + } + assertTrue("InvalidToken exception not thrown", exceptionCaught); + assertFalse(rm.getRMContext().getRMDelegationTokenSecretManager() + .getAllTokens().containsKey(ident)); + } + + private static String generateRenewTokenBody(String mediaType, String token) { + String body = ""; + if (mediaType.equals(MediaType.APPLICATION_JSON)) { + body = "{\"token\": \"" + token + "\" }"; + } else { + body = + "" + token + ""; + } + return body; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm index e419ceef577..6359e2b7f98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm @@ -2707,3 +2707,223 @@ Server: Jetty(6.1.26) +---+ +* Cluster {Delegation Tokens API} + + The Delegation Tokens API can be used to create, renew and cancel YARN ResourceManager delegation tokens. All delegation token requests must be carried out on a Kerberos authenticated connection(using SPNEGO). Carrying out operations on a non-kerberos connection will result in a FORBIDDEN response. In case of renewing a token, only the renewer specified when creating the token can renew the token. Other users(including the owner) are forbidden from renewing tokens. It should be noted that when cancelling or renewing a token, the token to be cancelled or renewed is specified by setting a header. + + This feature is currently in the alpha stage and may change in the future. + +** URI + + Use the following URI to create and cancel delegation tokens. + +------ + * http:///ws/v1/cluster/delegation-token +------ + + Use the following URI to renew delegation tokens. + +------ + * http:///ws/v1/cluster/delegation-token/expiration +------ + +** HTTP Operations Supported + +------ + * POST + * DELETE +------ + +** Query Parameters Supported + +------ + None +------ + +** Elements of the object + + The response from the delegation tokens API contains one of the fields listed below. + +*---------------+--------------+-------------------------------+ +|| Item || Data Type || Description | +*---------------+--------------+-------------------------------+ +| token | string | The delegation token | +*---------------+--------------+-------------------------------+ +| renewer | string | The user who is allowed to renew the delegation token | +*---------------+--------------+-------------------------------+ +| owner | string | The owner of the delegation token | +*---------------+--------------+-------------------------------+ +| kind | string | The kind of delegation token | +*---------------+--------------+-------------------------------+ +| expiration-time | long | The expiration time of the token | +*---------------+--------------+-------------------------------+ +| max-validity | long | The maximum validity of the token | +*---------------+--------------+-------------------------------+ + +** Response Examples + +*** Creating a token + + <> + + HTTP Request: + +------ + POST http:///ws/v1/cluster/delegation-token + Accept: application/json + Content-Type: application/json + { + "renewer" : "test-renewer" + } +------ + + Response Header + ++---+ + HTTP/1.1 200 OK + WWW-Authenticate: Negotiate ... + Date: Sat, 28 Jun 2014 18:08:11 GMT + Server: Jetty(6.1.26) + Set-Cookie: ... + Content-Type: application/json ++---+ + + Response body + ++---+ + { + "token":"MgASY2xpZW50QEVYQU1QTEUuQ09NDHRlc3QtcmVuZXdlcgCKAUckiEZpigFHSJTKaQECFN9EMM9BzfPoDxu572EVUpzqhnSGE1JNX0RFTEVHQVRJT05fVE9LRU4A", + "renewer":"test-renewer", + "owner":"client@EXAMPLE.COM", + "kind":"RM_DELEGATION_TOKEN", + "expiration-time":"1405153616489", + "max-validity":"1405672016489" + } ++---+ + + <> + + HTTP Request + +------ + POST http:///ws/v1/cluster/delegation-token + Accept: application/xml + Content-Type: application/xml + + test-renewer + +------ + + Response Header + ++---+ + HTTP/1.1 200 OK + WWW-Authenticate: Negotiate ... + Date: Sat, 28 Jun 2014 18:08:11 GMT + Content-Length: 423 + Server: Jetty(6.1.26) + Set-Cookie: ... + Content-Type: application/xml ++---+ + + Response Body + ++---+ + + + MgASY2xpZW50QEVYQU1QTEUuQ09NDHRlc3QtcmVuZXdlcgCKAUckgZ8yigFHSI4jMgcCFDTG8X6XFFn2udQngzSXQL8vWaKIE1JNX0RFTEVHQVRJT05fVE9LRU4A + test-renewer + client@EXAMPLE.COM + RM_DELEGATION_TOKEN + 1405153180466 + 1405671580466 + ++---+ + +*** Renewing a token + + <> + + HTTP Request: + +------ + POST http:///ws/v1/cluster/delegation-token/expiration + Accept: application/json + Hadoop-YARN-RM-Delegation-Token: MgASY2xpZW50QEVYQU1QTEUuQ09NDHRlc3QtcmVuZXdlcgCKAUbjqcHHigFHB7ZFxwQCFKWD3znCkDSy6SQIjRCLDydxbxvgE1JNX0RFTEVHQVRJT05fVE9LRU4A + Content-Type: application/json +------ + + Response Header + ++---+ + HTTP/1.1 200 OK + WWW-Authenticate: Negotiate ... + Date: Sat, 28 Jun 2014 18:08:11 GMT + Server: Jetty(6.1.26) + Set-Cookie: ... + Content-Type: application/json ++---+ + + Response body + ++---+ + { + "expiration-time":"1404112520402" + } ++---+ + + <> + + HTTP Request + +------ + POST http:///ws/v1/cluster/delegation-token/expiration + Accept: application/xml + Content-Type: application/xml + Hadoop-YARN-RM-Delegation-Token: MgASY2xpZW50QEVYQU1QTEUuQ09NDHRlc3QtcmVuZXdlcgCKAUbjqcHHigFHB7ZFxwQCFKWD3znCkDSy6SQIjRCLDydxbxvgE1JNX0RFTEVHQVRJT05fVE9LRU4A +------ + + Response Header + ++---+ + HTTP/1.1 200 OK + WWW-Authenticate: Negotiate ... + Date: Sat, 28 Jun 2014 18:08:11 GMT + Content-Length: 423 + Server: Jetty(6.1.26) + Set-Cookie: ... + Content-Type: application/xml ++---+ + + Response Body + ++---+ + + + 1404112520402 + ++---+ + +*** Cancelling a token + + HTTP Request + +----- +DELETE http:///ws/v1/cluster/delegation-token +Hadoop-YARN-RM-Delegation-Token: MgASY2xpZW50QEVYQU1QTEUuQ09NDHRlc3QtcmVuZXdlcgCKAUbjqcHHigFHB7ZFxwQCFKWD3znCkDSy6SQIjRCLDydxbxvgE1JNX0RFTEVHQVRJT05fVE9LRU4A +Accept: application/xml +----- + + Response Header + ++---+ + HTTP/1.1 200 OK + WWW-Authenticate: Negotiate ... + Date: Sun, 29 Jun 2014 07:25:18 GMT + Transfer-Encoding: chunked + Server: Jetty(6.1.26) + Set-Cookie: ... + Content-Type: application/xml ++---+ + + No response body. From 790ee456439729073d75ccf91e1f63b3d360b1c7 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Tue, 15 Jul 2014 23:05:40 +0000 Subject: [PATCH 04/49] HADOOP-10673. Update rpc metrics when the call throws an exception. Contributed by Ming Ma. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610879 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 ++ .../apache/hadoop/ipc/ProtobufRpcEngine.java | 33 ++++++++++----- .../java/org/apache/hadoop/ipc/Server.java | 12 ++---- .../apache/hadoop/ipc/WritableRpcEngine.java | 41 ++++++++++++------- .../java/org/apache/hadoop/ipc/TestRPC.java | 2 + 5 files changed, 57 insertions(+), 34 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 749c72f5fd5..83bac85b638 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -401,6 +401,9 @@ Release 2.6.0 - UNRELEASED if the override value is same as the final parameter value. (Ravi Prakash via suresh) + HADOOP-10673. Update rpc metrics when the call throws an exception. (Ming Ma + via jing9) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index 3bdcbd9856b..64615d22f85 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -599,24 +599,35 @@ public class ProtobufRpcEngine implements RpcEngine { .mergeFrom(request.theRequestRead).build(); Message result; + long startTime = Time.now(); + int qTime = (int) (startTime - receiveTime); + Exception exception = null; try { - long startTime = Time.now(); server.rpcDetailedMetrics.init(protocolImpl.protocolClass); result = service.callBlockingMethod(methodDescriptor, null, param); - int processingTime = (int) (Time.now() - startTime); - int qTime = (int) (startTime - receiveTime); - if (LOG.isDebugEnabled()) { - LOG.info("Served: " + methodName + " queueTime= " + qTime + - " procesingTime= " + processingTime); - } - server.rpcMetrics.addRpcQueueTime(qTime); - server.rpcMetrics.addRpcProcessingTime(processingTime); - server.rpcDetailedMetrics.addProcessingTime(methodName, - processingTime); } catch (ServiceException e) { + exception = (Exception) e.getCause(); throw (Exception) e.getCause(); } catch (Exception e) { + exception = e; throw e; + } finally { + int processingTime = (int) (Time.now() - startTime); + if (LOG.isDebugEnabled()) { + String msg = "Served: " + methodName + " queueTime= " + qTime + + " procesingTime= " + processingTime; + if (exception != null) { + msg += " exception= " + exception.getClass().getSimpleName(); + } + LOG.debug(msg); + } + String detailedMetricsName = (exception == null) ? + methodName : + exception.getClass().getSimpleName(); + server.rpcMetrics.addRpcQueueTime(qTime); + server.rpcMetrics.addRpcProcessingTime(processingTime); + server.rpcDetailedMetrics.addProcessingTime(detailedMetricsName, + processingTime); } return new RpcResponseWrapper(result); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 0f11c97c9eb..24dd0c21b82 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -355,8 +355,8 @@ public abstract class Server { private int readThreads; // number of read threads private int readerPendingConnectionQueue; // number of connections to queue per read thread private Class rpcRequestClass; // class used for deserializing the rpc request - protected RpcMetrics rpcMetrics; - protected RpcDetailedMetrics rpcDetailedMetrics; + final protected RpcMetrics rpcMetrics; + final protected RpcDetailedMetrics rpcDetailedMetrics; private Configuration conf; private String portRangeConfig = null; @@ -2494,12 +2494,8 @@ public abstract class Server { listener.doStop(); responder.interrupt(); notifyAll(); - if (this.rpcMetrics != null) { - this.rpcMetrics.shutdown(); - } - if (this.rpcDetailedMetrics != null) { - this.rpcDetailedMetrics.shutdown(); - } + this.rpcMetrics.shutdown(); + this.rpcDetailedMetrics.shutdown(); } /** Wait for the server to be stopped. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index 34823b34d1f..04ab4dc2699 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -471,37 +471,29 @@ public class WritableRpcEngine implements RpcEngine { // Invoke the protocol method + long startTime = Time.now(); + int qTime = (int) (startTime-receivedTime); + Exception exception = null; try { - long startTime = Time.now(); - Method method = + Method method = protocolImpl.protocolClass.getMethod(call.getMethodName(), call.getParameterClasses()); method.setAccessible(true); server.rpcDetailedMetrics.init(protocolImpl.protocolClass); Object value = method.invoke(protocolImpl.protocolImpl, call.getParameters()); - int processingTime = (int) (Time.now() - startTime); - int qTime = (int) (startTime-receivedTime); - if (LOG.isDebugEnabled()) { - LOG.debug("Served: " + call.getMethodName() + - " queueTime= " + qTime + - " procesingTime= " + processingTime); - } - server.rpcMetrics.addRpcQueueTime(qTime); - server.rpcMetrics.addRpcProcessingTime(processingTime); - server.rpcDetailedMetrics.addProcessingTime(call.getMethodName(), - processingTime); if (server.verbose) log("Return: "+value); - return new ObjectWritable(method.getReturnType(), value); } catch (InvocationTargetException e) { Throwable target = e.getTargetException(); if (target instanceof IOException) { + exception = (IOException)target; throw (IOException)target; } else { IOException ioe = new IOException(target.toString()); ioe.setStackTrace(target.getStackTrace()); + exception = ioe; throw ioe; } } catch (Throwable e) { @@ -510,8 +502,27 @@ public class WritableRpcEngine implements RpcEngine { } IOException ioe = new IOException(e.toString()); ioe.setStackTrace(e.getStackTrace()); + exception = ioe; throw ioe; - } + } finally { + int processingTime = (int) (Time.now() - startTime); + if (LOG.isDebugEnabled()) { + String msg = "Served: " + call.getMethodName() + + " queueTime= " + qTime + + " procesingTime= " + processingTime; + if (exception != null) { + msg += " exception= " + exception.getClass().getSimpleName(); + } + LOG.debug(msg); + } + String detailedMetricsName = (exception == null) ? + call.getMethodName() : + exception.getClass().getSimpleName(); + server.rpcMetrics.addRpcQueueTime(qTime); + server.rpcMetrics.addRpcProcessingTime(processingTime); + server.rpcDetailedMetrics.addProcessingTime(detailedMetricsName, + processingTime); + } } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index 45499f5c98f..dfbc91c43a6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -496,6 +496,8 @@ public class TestRPC { caught = true; } assertTrue(caught); + rb = getMetrics(server.rpcDetailedMetrics.name()); + assertCounter("IOExceptionNumOps", 1L, rb); proxy.testServerGet(); From 619837039b24f62430c9c930624e4591ad9941ee Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Wed, 16 Jul 2014 05:16:42 +0000 Subject: [PATCH 05/49] HADOOP-10845. Add common tests for ACLs in combination with viewfs. Contributed by Stephen Chu. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610911 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../apache/hadoop/fs/viewfs/ChRootedFs.java | 34 +++++++ .../hadoop/fs/viewfs/ViewFileSystem.java | 42 +++++++++ .../org/apache/hadoop/fs/viewfs/ViewFs.java | 89 +++++++++++++++++++ .../fs/viewfs/ViewFileSystemBaseTest.java | 55 +++++++++++- .../hadoop/fs/viewfs/ViewFsBaseTest.java | 52 +++++++++++ 6 files changed, 274 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 83bac85b638..d7a7bf93dcd 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -404,6 +404,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10673. Update rpc metrics when the call throws an exception. (Ming Ma via jing9) + HADOOP-10845. Add common tests for ACLs in combination with viewfs. + (Stephen Chu via cnauroth) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java index 2c184f6bb05..f1975eae1b2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFs.java @@ -37,6 +37,8 @@ import org.apache.hadoop.fs.FsStatus; import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Progressable; @@ -279,6 +281,38 @@ class ChRootedFs extends AbstractFileSystem { myFs.setTimes(fullPath(f), mtime, atime); } + @Override + public void modifyAclEntries(Path path, List aclSpec) + throws IOException { + myFs.modifyAclEntries(fullPath(path), aclSpec); + } + + @Override + public void removeAclEntries(Path path, List aclSpec) + throws IOException { + myFs.removeAclEntries(fullPath(path), aclSpec); + } + + @Override + public void removeDefaultAcl(Path path) throws IOException { + myFs.removeDefaultAcl(fullPath(path)); + } + + @Override + public void removeAcl(Path path) throws IOException { + myFs.removeAcl(fullPath(path)); + } + + @Override + public void setAcl(Path path, List aclSpec) throws IOException { + myFs.setAcl(fullPath(path), aclSpec); + } + + @Override + public AclStatus getAclStatus(Path path) throws IOException { + return myFs.getAclStatus(fullPath(path)); + } + @Override public void setVerifyChecksum(final boolean verifyChecksum) throws IOException, UnresolvedLinkException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index b2f2bed5a28..34a9afc5499 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.AclUtil; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.viewfs.InodeTree.INode; import org.apache.hadoop.fs.viewfs.InodeTree.INodeLink; @@ -871,5 +872,46 @@ public class ViewFileSystem extends FileSystem { public short getDefaultReplication(Path f) { throw new NotInMountpointException(f, "getDefaultReplication"); } + + @Override + public void modifyAclEntries(Path path, List aclSpec) + throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("modifyAclEntries", path); + } + + @Override + public void removeAclEntries(Path path, List aclSpec) + throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("removeAclEntries", path); + } + + @Override + public void removeDefaultAcl(Path path) throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("removeDefaultAcl", path); + } + + @Override + public void removeAcl(Path path) throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("removeAcl", path); + } + + @Override + public void setAcl(Path path, List aclSpec) throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("setAcl", path); + } + + @Override + public AclStatus getAclStatus(Path path) throws IOException { + checkPathIsSlash(path); + return new AclStatus.Builder().owner(ugi.getUserName()) + .group(ugi.getGroupNames()[0]) + .addEntries(AclUtil.getMinimalAcl(PERMISSION_555)) + .stickyBit(false).build(); + } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java index f2a433b95f8..232fcbbb409 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java @@ -49,6 +49,9 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.fs.local.LocalConfigKeys; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclUtil; +import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.viewfs.InodeTree.INode; import org.apache.hadoop.fs.viewfs.InodeTree.INodeLink; @@ -603,6 +606,51 @@ public class ViewFs extends AbstractFileSystem { return true; } + @Override + public void modifyAclEntries(Path path, List aclSpec) + throws IOException { + InodeTree.ResolveResult res = + fsState.resolve(getUriPath(path), true); + res.targetFileSystem.modifyAclEntries(res.remainingPath, aclSpec); + } + + @Override + public void removeAclEntries(Path path, List aclSpec) + throws IOException { + InodeTree.ResolveResult res = + fsState.resolve(getUriPath(path), true); + res.targetFileSystem.removeAclEntries(res.remainingPath, aclSpec); + } + + @Override + public void removeDefaultAcl(Path path) + throws IOException { + InodeTree.ResolveResult res = + fsState.resolve(getUriPath(path), true); + res.targetFileSystem.removeDefaultAcl(res.remainingPath); + } + + @Override + public void removeAcl(Path path) + throws IOException { + InodeTree.ResolveResult res = + fsState.resolve(getUriPath(path), true); + res.targetFileSystem.removeAcl(res.remainingPath); + } + + @Override + public void setAcl(Path path, List aclSpec) throws IOException { + InodeTree.ResolveResult res = + fsState.resolve(getUriPath(path), true); + res.targetFileSystem.setAcl(res.remainingPath, aclSpec); + } + + @Override + public AclStatus getAclStatus(Path path) throws IOException { + InodeTree.ResolveResult res = + fsState.resolve(getUriPath(path), true); + return res.targetFileSystem.getAclStatus(res.remainingPath); + } /* @@ -832,5 +880,46 @@ public class ViewFs extends AbstractFileSystem { throws AccessControlException { throw readOnlyMountTable("setVerifyChecksum", ""); } + + @Override + public void modifyAclEntries(Path path, List aclSpec) + throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("modifyAclEntries", path); + } + + @Override + public void removeAclEntries(Path path, List aclSpec) + throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("removeAclEntries", path); + } + + @Override + public void removeDefaultAcl(Path path) throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("removeDefaultAcl", path); + } + + @Override + public void removeAcl(Path path) throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("removeAcl", path); + } + + @Override + public void setAcl(Path path, List aclSpec) throws IOException { + checkPathIsSlash(path); + throw readOnlyMountTable("setAcl", path); + } + + @Override + public AclStatus getAclStatus(Path path) throws IOException { + checkPathIsSlash(path); + return new AclStatus.Builder().owner(ugi.getUserName()) + .group(ugi.getGroupNames()[0]) + .addEntries(AclUtil.getMinimalAcl(PERMISSION_555)) + .stickyBit(false).build(); + } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java index 2d3cb270f11..e1a440d0614 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java @@ -20,6 +20,7 @@ package org.apache.hadoop.fs.viewfs; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; +import java.util.ArrayList; import java.util.List; @@ -28,9 +29,16 @@ import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; import static org.apache.hadoop.fs.FileSystemTestHelper.*; +import org.apache.hadoop.fs.permission.AclEntry; +import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.AclUtil; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.viewfs.ConfigUtil; @@ -38,6 +46,7 @@ import org.apache.hadoop.fs.viewfs.ViewFileSystem; import org.apache.hadoop.fs.viewfs.ViewFileSystem.MountPoint; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.junit.After; import org.junit.Assert; @@ -96,7 +105,6 @@ public class ViewFileSystemBaseTest { // in the test root // Set up the defaultMT in the config with our mount point links - //Configuration conf = new Configuration(); conf = ViewFileSystemTestSetup.createConfig(); setupMountPoints(); fsView = FileSystem.get(FsConstants.VIEWFS_URI, conf); @@ -720,4 +728,49 @@ public class ViewFileSystemBaseTest { Assert.assertTrue("Other-readable permission not set!", perms.getOtherAction().implies(FsAction.READ)); } + + /** + * Verify the behavior of ACL operations on paths above the root of + * any mount table entry. + */ + + @Test(expected=AccessControlException.class) + public void testInternalModifyAclEntries() throws IOException { + fsView.modifyAclEntries(new Path("/internalDir"), + new ArrayList()); + } + + @Test(expected=AccessControlException.class) + public void testInternalRemoveAclEntries() throws IOException { + fsView.removeAclEntries(new Path("/internalDir"), + new ArrayList()); + } + + @Test(expected=AccessControlException.class) + public void testInternalRemoveDefaultAcl() throws IOException { + fsView.removeDefaultAcl(new Path("/internalDir")); + } + + @Test(expected=AccessControlException.class) + public void testInternalRemoveAcl() throws IOException { + fsView.removeAcl(new Path("/internalDir")); + } + + @Test(expected=AccessControlException.class) + public void testInternalSetAcl() throws IOException { + fsView.setAcl(new Path("/internalDir"), new ArrayList()); + } + + @Test + public void testInternalGetAclStatus() throws IOException { + final UserGroupInformation currentUser = + UserGroupInformation.getCurrentUser(); + AclStatus aclStatus = fsView.getAclStatus(new Path("/internalDir")); + assertEquals(aclStatus.getOwner(), currentUser.getUserName()); + assertEquals(aclStatus.getGroup(), currentUser.getGroupNames()[0]); + assertEquals(aclStatus.getEntries(), + AclUtil.getMinimalAcl(PERMISSION_555)); + assertFalse(aclStatus.isStickyBit()); + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java index 0f771cd3ba5..2813c34bef4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsBaseTest.java @@ -22,10 +22,14 @@ import static org.apache.hadoop.fs.FileContextTestHelper.checkFileStatus; import static org.apache.hadoop.fs.FileContextTestHelper.exists; import static org.apache.hadoop.fs.FileContextTestHelper.isDir; import static org.apache.hadoop.fs.FileContextTestHelper.isFile; +import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; +import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -39,8 +43,12 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.AclUtil; import org.apache.hadoop.fs.viewfs.ViewFs.MountPoint; import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.junit.After; import org.junit.Assert; @@ -695,4 +703,48 @@ public class ViewFsBaseTest { public void testInternalSetOwner() throws IOException { fcView.setOwner(new Path("/internalDir"), "foo", "bar"); } + + /** + * Verify the behavior of ACL operations on paths above the root of + * any mount table entry. + */ + + @Test(expected=AccessControlException.class) + public void testInternalModifyAclEntries() throws IOException { + fcView.modifyAclEntries(new Path("/internalDir"), + new ArrayList()); + } + + @Test(expected=AccessControlException.class) + public void testInternalRemoveAclEntries() throws IOException { + fcView.removeAclEntries(new Path("/internalDir"), + new ArrayList()); + } + + @Test(expected=AccessControlException.class) + public void testInternalRemoveDefaultAcl() throws IOException { + fcView.removeDefaultAcl(new Path("/internalDir")); + } + + @Test(expected=AccessControlException.class) + public void testInternalRemoveAcl() throws IOException { + fcView.removeAcl(new Path("/internalDir")); + } + + @Test(expected=AccessControlException.class) + public void testInternalSetAcl() throws IOException { + fcView.setAcl(new Path("/internalDir"), new ArrayList()); + } + + @Test + public void testInternalGetAclStatus() throws IOException { + final UserGroupInformation currentUser = + UserGroupInformation.getCurrentUser(); + AclStatus aclStatus = fcView.getAclStatus(new Path("/internalDir")); + assertEquals(aclStatus.getOwner(), currentUser.getUserName()); + assertEquals(aclStatus.getGroup(), currentUser.getGroupNames()[0]); + assertEquals(aclStatus.getEntries(), + AclUtil.getMinimalAcl(PERMISSION_555)); + assertFalse(aclStatus.isStickyBit()); + } } From dc31d66f8a97fa824e9cbfde17d723dd5db44e07 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Wed, 16 Jul 2014 10:48:15 +0000 Subject: [PATCH 06/49] HADOOP-9921. daemon scripts should remove pid file on stop call after stop or process is found not running ( Contributed by Vinayakumar B) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610964 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../hadoop-common/src/main/bin/hadoop-daemon.sh | 1 + hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh | 1 + hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh | 1 + 4 files changed, 6 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d7a7bf93dcd..eb33836841e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -422,6 +422,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10810. Clean up native code compilation warnings. (cnauroth) + HADOOP-9921. daemon scripts should remove pid file on stop call after stop + or process is found not running ( vinayakumarb ) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh index bb6ed8690ef..6a4cd69152e 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh @@ -198,6 +198,7 @@ case $startStop in else echo no $command to stop fi + rm -f $pid else echo no $command to stop fi diff --git a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh index 9ef3d454d1e..7585c9a81e8 100644 --- a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh +++ b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh @@ -133,6 +133,7 @@ case $startStop in else echo no $command to stop fi + rm -f $pid else echo no $command to stop fi diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh index 527ae42cfd3..fbfa71d80df 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh @@ -145,6 +145,7 @@ case $startStop in else echo no $command to stop fi + rm -f $pid else echo no $command to stop fi From 80d7f0911cf42eee001fe1a3d6958b82635df671 Mon Sep 17 00:00:00 2001 From: Uma Maheswara Rao G Date: Wed, 16 Jul 2014 11:24:41 +0000 Subject: [PATCH 07/49] HADOOP-10824. Refactor KMSACLs to avoid locking. (Benoy Antony via umamahesh) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1610969 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 ++ .../hadoop/crypto/key/kms/server/KMSACLs.java | 31 +++++-------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index eb33836841e..d52558620d0 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -178,6 +178,8 @@ Trunk (Unreleased) HADOOP-10736. Add key attributes to the key shell. (Mike Yoder via wang) + HADOOP-10824. Refactor KMSACLs to avoid locking. (Benoy Antony via umamahesh) + BUG FIXES HADOOP-9451. Fault single-layer config if node group topology is enabled. diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java index d04a7142bd0..e3e6ce09007 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java @@ -28,8 +28,6 @@ import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; /** * Provides access to the AccessControlLists used by KMS, @@ -52,13 +50,11 @@ public class KMSACLs implements Runnable { public static final int RELOADER_SLEEP_MILLIS = 1000; - Map acls; - private ReadWriteLock lock; + private volatile Map acls; private ScheduledExecutorService executorService; private long lastReload; KMSACLs(Configuration conf) { - lock = new ReentrantReadWriteLock(); if (conf == null) { conf = loadACLs(); } @@ -70,17 +66,13 @@ public class KMSACLs implements Runnable { } private void setACLs(Configuration conf) { - lock.writeLock().lock(); - try { - acls = new HashMap(); - for (Type aclType : Type.values()) { - String aclStr = conf.get(aclType.getConfigKey(), ACL_DEFAULT); - acls.put(aclType, new AccessControlList(aclStr)); - LOG.info("'{}' ACL '{}'", aclType, aclStr); - } - } finally { - lock.writeLock().unlock(); + Map tempAcls = new HashMap(); + for (Type aclType : Type.values()) { + String aclStr = conf.get(aclType.getConfigKey(), ACL_DEFAULT); + tempAcls.put(aclType, new AccessControlList(aclStr)); + LOG.info("'{}' ACL '{}'", aclType, aclStr); } + acls = tempAcls; } @Override @@ -120,14 +112,7 @@ public class KMSACLs implements Runnable { public boolean hasAccess(Type type, String user) { UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user); - AccessControlList acl = null; - lock.readLock().lock(); - try { - acl = acls.get(type); - } finally { - lock.readLock().unlock(); - } - return acl.isUserAllowed(ugi); + return acls.get(type).isUserAllowed(ugi); } } From 43fe48d9e269fb49927df6b3a782d430c20d5434 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Wed, 16 Jul 2014 16:27:38 +0000 Subject: [PATCH 08/49] HDFS-5624. Add HDFS tests for ACLs in combination with viewfs. Contributed by Stephen Chu. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611068 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../fs/viewfs/TestViewFileSystemWithAcls.java | 190 ++++++++++++++++++ .../hadoop/fs/viewfs/TestViewFsWithAcls.java | 190 ++++++++++++++++++ 3 files changed, 383 insertions(+) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAcls.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAcls.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index cc1a4b6a9c6..2087279454c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -287,6 +287,9 @@ Release 2.6.0 - UNRELEASED HDFS-2856. Fix block protocol so that Datanodes don't require root or jsvc. (cnauroth) + HDFS-5624. Add HDFS tests for ACLs in combination with viewfs. + (Stephen Chu via cnauroth) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAcls.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAcls.java new file mode 100644 index 00000000000..68dd7f2239f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemWithAcls.java @@ -0,0 +1,190 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.FsConstants; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; + +import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS; +import static org.apache.hadoop.fs.permission.AclEntryScope.DEFAULT; +import static org.apache.hadoop.fs.permission.AclEntryType.*; +import static org.apache.hadoop.fs.permission.FsAction.*; +import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.aclEntry; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +/** + * Verify ACL through ViewFileSystem functionality. + */ +public class TestViewFileSystemWithAcls { + + private static MiniDFSCluster cluster; + private static Configuration clusterConf = new Configuration(); + private static FileSystem fHdfs; + private static FileSystem fHdfs2; + private FileSystem fsView; + private Configuration fsViewConf; + private FileSystem fsTarget, fsTarget2; + private Path targetTestRoot, targetTestRoot2, mountOnNn1, mountOnNn2; + private FileSystemTestHelper fileSystemTestHelper = + new FileSystemTestHelper("/tmp/TestViewFileSystemWithAcls"); + + @BeforeClass + public static void clusterSetupAtBeginning() throws IOException { + clusterConf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true); + cluster = new MiniDFSCluster.Builder(clusterConf) + .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2)) + .numDataNodes(2) + .build(); + cluster.waitClusterUp(); + + fHdfs = cluster.getFileSystem(0); + fHdfs2 = cluster.getFileSystem(1); + } + + @AfterClass + public static void ClusterShutdownAtEnd() throws Exception { + cluster.shutdown(); + } + + @Before + public void setUp() throws Exception { + fsTarget = fHdfs; + fsTarget2 = fHdfs2; + targetTestRoot = fileSystemTestHelper.getAbsoluteTestRootPath(fsTarget); + targetTestRoot2 = fileSystemTestHelper.getAbsoluteTestRootPath(fsTarget2); + + fsTarget.delete(targetTestRoot, true); + fsTarget2.delete(targetTestRoot2, true); + fsTarget.mkdirs(targetTestRoot); + fsTarget2.mkdirs(targetTestRoot2); + + fsViewConf = ViewFileSystemTestSetup.createConfig(); + setupMountPoints(); + fsView = FileSystem.get(FsConstants.VIEWFS_URI, fsViewConf); + } + + private void setupMountPoints() { + mountOnNn1 = new Path("/mountOnNn1"); + mountOnNn2 = new Path("/mountOnNn2"); + ConfigUtil.addLink(fsViewConf, mountOnNn1.toString(), targetTestRoot.toUri()); + ConfigUtil.addLink(fsViewConf, mountOnNn2.toString(), targetTestRoot2.toUri()); + } + + @After + public void tearDown() throws Exception { + fsTarget.delete(fileSystemTestHelper.getTestRootPath(fsTarget), true); + fsTarget2.delete(fileSystemTestHelper.getTestRootPath(fsTarget2), true); + } + + /** + * Verify a ViewFs wrapped over multiple federated NameNodes will + * dispatch the ACL operations to the correct NameNode. + */ + @Test + public void testAclOnMountEntry() throws Exception { + // Set ACLs on the first namespace and verify they are correct + List aclSpec = Lists.newArrayList( + aclEntry(ACCESS, USER, READ_WRITE), + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ), + aclEntry(ACCESS, OTHER, NONE)); + fsView.setAcl(mountOnNn1, aclSpec); + + AclEntry[] expected = new AclEntry[] { + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ) }; + assertArrayEquals(expected, aclEntryArray(fsView.getAclStatus(mountOnNn1))); + // Double-check by getting ACL status using FileSystem + // instead of ViewFs + assertArrayEquals(expected, aclEntryArray(fHdfs.getAclStatus(targetTestRoot))); + + // Modify the ACL entries on the first namespace + aclSpec = Lists.newArrayList( + aclEntry(DEFAULT, USER, "foo", READ)); + fsView.modifyAclEntries(mountOnNn1, aclSpec); + expected = new AclEntry[] { + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ), + aclEntry(DEFAULT, USER, READ_WRITE), + aclEntry(DEFAULT, USER, "foo", READ), + aclEntry(DEFAULT, GROUP, READ), + aclEntry(DEFAULT, MASK, READ), + aclEntry(DEFAULT, OTHER, NONE) }; + assertArrayEquals(expected, aclEntryArray(fsView.getAclStatus(mountOnNn1))); + + fsView.removeDefaultAcl(mountOnNn1); + expected = new AclEntry[] { + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ) }; + assertArrayEquals(expected, aclEntryArray(fsView.getAclStatus(mountOnNn1))); + assertArrayEquals(expected, aclEntryArray(fHdfs.getAclStatus(targetTestRoot))); + + // Paranoid check: verify the other namespace does not + // have ACLs set on the same path. + assertEquals(0, fsView.getAclStatus(mountOnNn2).getEntries().size()); + assertEquals(0, fHdfs2.getAclStatus(targetTestRoot2).getEntries().size()); + + // Remove the ACL entries on the first namespace + fsView.removeAcl(mountOnNn1); + assertEquals(0, fsView.getAclStatus(mountOnNn1).getEntries().size()); + assertEquals(0, fHdfs.getAclStatus(targetTestRoot).getEntries().size()); + + // Now set ACLs on the second namespace + aclSpec = Lists.newArrayList( + aclEntry(ACCESS, USER, "bar", READ)); + fsView.modifyAclEntries(mountOnNn2, aclSpec); + expected = new AclEntry[] { + aclEntry(ACCESS, USER, "bar", READ), + aclEntry(ACCESS, GROUP, READ_EXECUTE) }; + assertArrayEquals(expected, aclEntryArray(fsView.getAclStatus(mountOnNn2))); + assertArrayEquals(expected, aclEntryArray(fHdfs2.getAclStatus(targetTestRoot2))); + + // Remove the ACL entries on the second namespace + fsView.removeAclEntries(mountOnNn2, Lists.newArrayList( + aclEntry(ACCESS, USER, "bar", READ) + )); + expected = new AclEntry[] { aclEntry(ACCESS, GROUP, READ_EXECUTE) }; + assertArrayEquals(expected, aclEntryArray(fHdfs2.getAclStatus(targetTestRoot2))); + fsView.removeAcl(mountOnNn2); + assertEquals(0, fsView.getAclStatus(mountOnNn2).getEntries().size()); + assertEquals(0, fHdfs2.getAclStatus(targetTestRoot2).getEntries().size()); + } + + private AclEntry[] aclEntryArray(AclStatus aclStatus) { + return aclStatus.getEntries().toArray(new AclEntry[0]); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAcls.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAcls.java new file mode 100644 index 00000000000..70918e9d2b5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsWithAcls.java @@ -0,0 +1,190 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.viewfs; + +import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileContextTestHelper; +import org.apache.hadoop.fs.FsConstants; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import java.util.List; + +import java.io.IOException; + +import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS; +import static org.apache.hadoop.fs.permission.AclEntryScope.DEFAULT; +import static org.apache.hadoop.fs.permission.AclEntryType.*; +import static org.apache.hadoop.fs.permission.FsAction.*; +import static org.apache.hadoop.fs.permission.FsAction.NONE; +import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.aclEntry; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +/** + * Verify ACL through ViewFs functionality. + */ +public class TestViewFsWithAcls { + + private static MiniDFSCluster cluster; + private static Configuration clusterConf = new Configuration(); + private static FileContext fc, fc2; + private FileContext fcView, fcTarget, fcTarget2; + private Configuration fsViewConf; + private Path targetTestRoot, targetTestRoot2, mountOnNn1, mountOnNn2; + private FileContextTestHelper fileContextTestHelper = + new FileContextTestHelper("/tmp/TestViewFsWithAcls"); + + @BeforeClass + public static void clusterSetupAtBeginning() throws IOException { + clusterConf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true); + cluster = new MiniDFSCluster.Builder(clusterConf) + .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2)) + .numDataNodes(2) + .build(); + cluster.waitClusterUp(); + + fc = FileContext.getFileContext(cluster.getURI(0), clusterConf); + fc2 = FileContext.getFileContext(cluster.getURI(1), clusterConf); + } + + @AfterClass + public static void ClusterShutdownAtEnd() throws Exception { + cluster.shutdown(); + } + + @Before + public void setUp() throws Exception { + fcTarget = fc; + fcTarget2 = fc2; + targetTestRoot = fileContextTestHelper.getAbsoluteTestRootPath(fc); + targetTestRoot2 = fileContextTestHelper.getAbsoluteTestRootPath(fc2); + + fcTarget.delete(targetTestRoot, true); + fcTarget2.delete(targetTestRoot2, true); + fcTarget.mkdir(targetTestRoot, new FsPermission((short)0750), true); + fcTarget2.mkdir(targetTestRoot2, new FsPermission((short)0750), true); + + fsViewConf = ViewFileSystemTestSetup.createConfig(); + setupMountPoints(); + fcView = FileContext.getFileContext(FsConstants.VIEWFS_URI, fsViewConf); + } + + private void setupMountPoints() { + mountOnNn1 = new Path("/mountOnNn1"); + mountOnNn2 = new Path("/mountOnNn2"); + ConfigUtil.addLink(fsViewConf, mountOnNn1.toString(), targetTestRoot.toUri()); + ConfigUtil.addLink(fsViewConf, mountOnNn2.toString(), targetTestRoot2.toUri()); + } + + @After + public void tearDown() throws Exception { + fcTarget.delete(fileContextTestHelper.getTestRootPath(fcTarget), true); + fcTarget2.delete(fileContextTestHelper.getTestRootPath(fcTarget2), true); + } + + /** + * Verify a ViewFs wrapped over multiple federated NameNodes will + * dispatch the ACL operations to the correct NameNode. + */ + @Test + public void testAclOnMountEntry() throws Exception { + // Set ACLs on the first namespace and verify they are correct + List aclSpec = Lists.newArrayList( + aclEntry(ACCESS, USER, READ_WRITE), + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ), + aclEntry(ACCESS, OTHER, NONE)); + fcView.setAcl(mountOnNn1, aclSpec); + + AclEntry[] expected = new AclEntry[] { + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ) }; + assertArrayEquals(expected, aclEntryArray(fcView.getAclStatus(mountOnNn1))); + // Double-check by getting ACL status using FileSystem + // instead of ViewFs + assertArrayEquals(expected, aclEntryArray(fc.getAclStatus(targetTestRoot))); + + // Modify the ACL entries on the first namespace + aclSpec = Lists.newArrayList( + aclEntry(DEFAULT, USER, "foo", READ)); + fcView.modifyAclEntries(mountOnNn1, aclSpec); + expected = new AclEntry[] { + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ), + aclEntry(DEFAULT, USER, READ_WRITE), + aclEntry(DEFAULT, USER, "foo", READ), + aclEntry(DEFAULT, GROUP, READ), + aclEntry(DEFAULT, MASK, READ), + aclEntry(DEFAULT, OTHER, NONE) }; + assertArrayEquals(expected, aclEntryArray(fcView.getAclStatus(mountOnNn1))); + + fcView.removeDefaultAcl(mountOnNn1); + expected = new AclEntry[] { + aclEntry(ACCESS, USER, "foo", READ), + aclEntry(ACCESS, GROUP, READ) }; + assertArrayEquals(expected, aclEntryArray(fcView.getAclStatus(mountOnNn1))); + assertArrayEquals(expected, aclEntryArray(fc.getAclStatus(targetTestRoot))); + + // Paranoid check: verify the other namespace does not + // have ACLs set on the same path. + assertEquals(0, fcView.getAclStatus(mountOnNn2).getEntries().size()); + assertEquals(0, fc2.getAclStatus(targetTestRoot2).getEntries().size()); + + // Remove the ACL entries on the first namespace + fcView.removeAcl(mountOnNn1); + assertEquals(0, fcView.getAclStatus(mountOnNn1).getEntries().size()); + assertEquals(0, fc.getAclStatus(targetTestRoot).getEntries().size()); + + // Now set ACLs on the second namespace + aclSpec = Lists.newArrayList( + aclEntry(ACCESS, USER, "bar", READ)); + fcView.modifyAclEntries(mountOnNn2, aclSpec); + expected = new AclEntry[] { + aclEntry(ACCESS, USER, "bar", READ), + aclEntry(ACCESS, GROUP, READ_EXECUTE) }; + assertArrayEquals(expected, aclEntryArray(fcView.getAclStatus(mountOnNn2))); + assertArrayEquals(expected, aclEntryArray(fc2.getAclStatus(targetTestRoot2))); + + // Remove the ACL entries on the second namespace + fcView.removeAclEntries(mountOnNn2, Lists.newArrayList( + aclEntry(ACCESS, USER, "bar", READ) + )); + expected = new AclEntry[] { aclEntry(ACCESS, GROUP, READ_EXECUTE) }; + assertArrayEquals(expected, aclEntryArray(fc2.getAclStatus(targetTestRoot2))); + fcView.removeAcl(mountOnNn2); + assertEquals(0, fcView.getAclStatus(mountOnNn2).getEntries().size()); + assertEquals(0, fc2.getAclStatus(targetTestRoot2).getEntries().size()); + } + + private AclEntry[] aclEntryArray(AclStatus aclStatus) { + return aclStatus.getEntries().toArray(new AclEntry[0]); + } + +} From 40e1bb9d316bce5cd720d3c091d651b3586ddb94 Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 16 Jul 2014 18:25:05 +0000 Subject: [PATCH 09/49] YARN-2264. Fixed a race condition in DrainDispatcher which may cause random test failures. Contributed by Li Lu git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611126 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../apache/hadoop/yarn/event/DrainDispatcher.java | 14 ++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 41fd5bfd5a3..6f8567e205d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -59,6 +59,9 @@ Release 2.6.0 - UNRELEASED YARN-2260. Fixed ResourceManager's RMNode to correctly remember containers when nodes resync during work-preserving RM restart. (Jian He via vinodkv) + YARN-2264. Fixed a race condition in DrainDispatcher which may cause random + test failures. (Li Lu via jianhe) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/DrainDispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/DrainDispatcher.java index e79e7b360ef..803b2bb2b3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/DrainDispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/DrainDispatcher.java @@ -28,6 +28,7 @@ public class DrainDispatcher extends AsyncDispatcher { // and similar grotesqueries private volatile boolean drained = false; private final BlockingQueue queue; + final Object mutex; public DrainDispatcher() { this(new LinkedBlockingQueue()); @@ -36,6 +37,7 @@ public class DrainDispatcher extends AsyncDispatcher { private DrainDispatcher(BlockingQueue eventQueue) { super(eventQueue); this.queue = eventQueue; + this.mutex = this; } /** @@ -53,8 +55,10 @@ public class DrainDispatcher extends AsyncDispatcher { @Override public void run() { while (!Thread.currentThread().isInterrupted()) { - // !drained if dispatch queued new events on this dispatcher - drained = queue.isEmpty(); + synchronized (mutex) { + // !drained if dispatch queued new events on this dispatcher + drained = queue.isEmpty(); + } Event event; try { event = queue.take(); @@ -75,8 +79,10 @@ public class DrainDispatcher extends AsyncDispatcher { return new EventHandler() { @Override public void handle(Event event) { - drained = false; - actual.handle(event); + synchronized (mutex) { + actual.handle(event); + drained = false; + } } }; } From 7f357d68cca8930235683b43460e56973efdb6a5 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Wed, 16 Jul 2014 18:50:10 +0000 Subject: [PATCH 10/49] HADOOP-10839. Add unregisterSource() to MetricsSystem API. Contributed by Shanyu Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611134 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../apache/hadoop/metrics2/MetricsSystem.java | 6 ++++++ .../hadoop/metrics2/impl/MetricsSystemImpl.java | 11 +++++++++++ .../metrics2/impl/TestMetricsSystemImpl.java | 17 +++++++++++++++++ 4 files changed, 37 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d52558620d0..75dba1892a2 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -409,6 +409,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10845. Add common tests for ACLs in combination with viewfs. (Stephen Chu via cnauroth) + HADOOP-10839. Add unregisterSource() to MetricsSystem API. + (Shanyu Zhao via cnauroth) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java index e853319c4e8..a277abd6e13 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java @@ -54,6 +54,12 @@ public abstract class MetricsSystem implements MetricsSystemMXBean { */ public abstract T register(String name, String desc, T source); + /** + * Unregister a metrics source + * @param name of the source. This is the name you use to call register() + */ + public abstract void unregisterSource(String name); + /** * Register a metrics source (deriving name and description from the object) * @param the actual type of the source object diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index cf2dda4e380..722abd95c4a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -232,6 +232,17 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource { return source; } + @Override public synchronized + void unregisterSource(String name) { + if (sources.containsKey(name)) { + sources.get(name).stop(); + sources.remove(name); + } + if (allSources.containsKey(name)) { + allSources.remove(name); + } + } + synchronized void registerSource(String name, String desc, MetricsSource source) { checkNotNull(config, "config"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java index 63dcbb2afae..0122045d383 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java @@ -380,6 +380,23 @@ public class TestMetricsSystemImpl { ms.shutdown(); } + @Test public void testUnregisterSource() { + MetricsSystem ms = new MetricsSystemImpl(); + TestSource ts1 = new TestSource("ts1"); + TestSource ts2 = new TestSource("ts2"); + ms.register("ts1", "", ts1); + ms.register("ts2", "", ts2); + MetricsSource s1 = ms.getSource("ts1"); + assertNotNull(s1); + // should work when metrics system is not started + ms.unregisterSource("ts1"); + s1 = ms.getSource("ts1"); + assertNull(s1); + MetricsSource s2 = ms.getSource("ts2"); + assertNotNull(s2); + ms.shutdown(); + } + private void checkMetricsRecords(List recs) { LOG.debug(recs); MetricsRecord r = recs.get(0); From e896de98f2dba9fbd58024b8da35c8b72a404724 Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Wed, 16 Jul 2014 18:52:03 +0000 Subject: [PATCH 11/49] HDFS-6689. NFS doesn't return correct lookup access for direcories. Contributed by Brandon Li git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611135 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java | 2 ++ .../org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java | 7 +++++++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ 3 files changed, 11 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java index 5c30f16bc97..71b018506e5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3Utils.java @@ -154,6 +154,8 @@ public class Nfs3Utils { if (isSet(mode, Nfs3Constant.ACCESS_MODE_EXECUTE)) { if (type == NfsFileType.NFSREG.toValue()) { rtn |= Nfs3Constant.ACCESS3_EXECUTE; + } else { + rtn |= Nfs3Constant.ACCESS3_LOOKUP; } } return rtn; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java index b5f0cd4c539..77646af2c5e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestNfs3Utils.java @@ -68,5 +68,12 @@ public class TestNfs3Utils { 0, Nfs3Utils.getAccessRightsForUserGroup(3, 10, new int[] {5, 16, 4}, attr)); assertEquals("No access should be allowed for dir as mode is 700 even though AuxGID does match", 0, Nfs3Utils.getAccessRightsForUserGroup(3, 20, new int[] {5, 10}, attr)); + + Mockito.when(attr.getUid()).thenReturn(2); + Mockito.when(attr.getGid()).thenReturn(10); + Mockito.when(attr.getMode()).thenReturn(457); // 711 + Mockito.when(attr.getType()).thenReturn(NfsFileType.NFSDIR.toValue()); + assertEquals("Access should be allowed for dir as mode is 711 and GID matches", + 2 /* Lookup */, Nfs3Utils.getAccessRightsForUserGroup(3, 10, new int[] {5, 16, 11}, attr)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 2087279454c..0472583e39a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -315,6 +315,8 @@ Release 2.6.0 - UNRELEASED HDFS-6456. NFS should throw error for invalid entry in dfs.nfs.exports.allowed.hosts (Abhiraj Butala via brandonli) + HDFS-6689. NFS doesn't return correct lookup access for direcories (brandonli) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES From 43a12f3c0119555eee919e2b12ecb6c836f05934 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 16 Jul 2014 21:24:51 +0000 Subject: [PATCH 12/49] MAPREDUCE-5952. LocalContainerLauncher#renameMapOutputForReduce incorrectly assumes a single dir for mapOutIndex. (Gera Shegalov via kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611196 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../hadoop/mapred/LocalContainerLauncher.java | 79 ++++++++++--------- .../mapred/TestLocalContainerLauncher.java | 73 +++++++++++++++++ 3 files changed, 117 insertions(+), 38 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 11e388983f4..35ba7a8f481 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -307,6 +307,9 @@ Release 2.5.0 - UNRELEASED resource configuration for deciding uber-mode on map-only jobs. (Siqi Li via vinodkv) + MAPREDUCE-5952. LocalContainerLauncher#renameMapOutputForReduce incorrectly + assumes a single dir for mapOutIndex. (Gera Shegalov via kasha) + Release 2.4.1 - 2014-06-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java index 6425144b6b5..c7898ed966f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java @@ -30,6 +30,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSError; @@ -437,43 +438,6 @@ public class LocalContainerLauncher extends AbstractService implements } } - /** - * Within the _local_ filesystem (not HDFS), all activity takes place within - * a single subdir (${local.dir}/usercache/$user/appcache/$appId/$contId/), - * and all sub-MapTasks create the same filename ("file.out"). Rename that - * to something unique (e.g., "map_0.out") to avoid collisions. - * - * Longer-term, we'll modify [something] to use TaskAttemptID-based - * filenames instead of "file.out". (All of this is entirely internal, - * so there are no particular compatibility issues.) - */ - private MapOutputFile renameMapOutputForReduce(JobConf conf, - TaskAttemptId mapId, MapOutputFile subMapOutputFile) throws IOException { - FileSystem localFs = FileSystem.getLocal(conf); - // move map output to reduce input - Path mapOut = subMapOutputFile.getOutputFile(); - FileStatus mStatus = localFs.getFileStatus(mapOut); - Path reduceIn = subMapOutputFile.getInputFileForWrite( - TypeConverter.fromYarn(mapId).getTaskID(), mStatus.getLen()); - Path mapOutIndex = new Path(mapOut.toString() + ".index"); - Path reduceInIndex = new Path(reduceIn.toString() + ".index"); - if (LOG.isDebugEnabled()) { - LOG.debug("Renaming map output file for task attempt " - + mapId.toString() + " from original location " + mapOut.toString() - + " to destination " + reduceIn.toString()); - } - if (!localFs.mkdirs(reduceIn.getParent())) { - throw new IOException("Mkdirs failed to create " - + reduceIn.getParent().toString()); - } - if (!localFs.rename(mapOut, reduceIn)) - throw new IOException("Couldn't rename " + mapOut); - if (!localFs.rename(mapOutIndex, reduceInIndex)) - throw new IOException("Couldn't rename " + mapOutIndex); - - return new RenamedMapOutputFile(reduceIn); - } - /** * Also within the local filesystem, we need to restore the initial state * of the directory as much as possible. Compare current contents against @@ -506,7 +470,46 @@ public class LocalContainerLauncher extends AbstractService implements } } // end EventHandler - + + /** + * Within the _local_ filesystem (not HDFS), all activity takes place within + * a subdir inside one of the LOCAL_DIRS + * (${local.dir}/usercache/$user/appcache/$appId/$contId/), + * and all sub-MapTasks create the same filename ("file.out"). Rename that + * to something unique (e.g., "map_0.out") to avoid possible collisions. + * + * Longer-term, we'll modify [something] to use TaskAttemptID-based + * filenames instead of "file.out". (All of this is entirely internal, + * so there are no particular compatibility issues.) + */ + @VisibleForTesting + protected static MapOutputFile renameMapOutputForReduce(JobConf conf, + TaskAttemptId mapId, MapOutputFile subMapOutputFile) throws IOException { + FileSystem localFs = FileSystem.getLocal(conf); + // move map output to reduce input + Path mapOut = subMapOutputFile.getOutputFile(); + FileStatus mStatus = localFs.getFileStatus(mapOut); + Path reduceIn = subMapOutputFile.getInputFileForWrite( + TypeConverter.fromYarn(mapId).getTaskID(), mStatus.getLen()); + Path mapOutIndex = subMapOutputFile.getOutputIndexFile(); + Path reduceInIndex = new Path(reduceIn.toString() + ".index"); + if (LOG.isDebugEnabled()) { + LOG.debug("Renaming map output file for task attempt " + + mapId.toString() + " from original location " + mapOut.toString() + + " to destination " + reduceIn.toString()); + } + if (!localFs.mkdirs(reduceIn.getParent())) { + throw new IOException("Mkdirs failed to create " + + reduceIn.getParent().toString()); + } + if (!localFs.rename(mapOut, reduceIn)) + throw new IOException("Couldn't rename " + mapOut); + if (!localFs.rename(mapOutIndex, reduceInIndex)) + throw new IOException("Couldn't rename " + mapOutIndex); + + return new RenamedMapOutputFile(reduceIn); + } + private static class RenamedMapOutputFile extends MapOutputFile { private Path path; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestLocalContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestLocalContainerLauncher.java index 9a0662ee2c4..28a891850e4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestLocalContainerLauncher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestLocalContainerLauncher.java @@ -18,17 +18,26 @@ package org.apache.hadoop.mapred; +import static org.apache.hadoop.fs.CreateFlag.CREATE; import static org.mockito.Matchers.isA; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import java.io.File; +import java.io.IOException; +import java.util.EnumSet; import java.util.HashMap; import java.util.Map; import java.util.concurrent.CountDownLatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; @@ -46,6 +55,9 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -53,6 +65,36 @@ import org.mockito.stubbing.Answer; public class TestLocalContainerLauncher { private static final Log LOG = LogFactory.getLog(TestLocalContainerLauncher.class); + private static File testWorkDir; + private static final String[] localDirs = new String[2]; + + private static void delete(File dir) throws IOException { + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.getLocal(conf); + Path p = fs.makeQualified(new Path(dir.getAbsolutePath())); + fs.delete(p, true); + } + + @BeforeClass + public static void setupTestDirs() throws IOException { + testWorkDir = new File("target", + TestLocalContainerLauncher.class.getCanonicalName()); + testWorkDir.delete(); + testWorkDir.mkdirs(); + testWorkDir = testWorkDir.getAbsoluteFile(); + for (int i = 0; i < localDirs.length; i++) { + final File dir = new File(testWorkDir, "local-" + i); + dir.mkdirs(); + localDirs[i] = dir.toString(); + } + } + + @AfterClass + public static void cleanupTestDirs() throws IOException { + if (testWorkDir != null) { + delete(testWorkDir); + } + } @SuppressWarnings("rawtypes") @Test(timeout=10000) @@ -141,4 +183,35 @@ public class TestLocalContainerLauncher { when(container.getNodeId()).thenReturn(nodeId); return container; } + + + @Test + public void testRenameMapOutputForReduce() throws Exception { + final JobConf conf = new JobConf(); + + final MROutputFiles mrOutputFiles = new MROutputFiles(); + mrOutputFiles.setConf(conf); + + // make sure both dirs are distinct + // + conf.set(MRConfig.LOCAL_DIR, localDirs[0].toString()); + final Path mapOut = mrOutputFiles.getOutputFileForWrite(1); + conf.set(MRConfig.LOCAL_DIR, localDirs[1].toString()); + final Path mapOutIdx = mrOutputFiles.getOutputIndexFileForWrite(1); + Assert.assertNotEquals("Paths must be different!", + mapOut.getParent(), mapOutIdx.getParent()); + + // make both dirs part of LOCAL_DIR + conf.setStrings(MRConfig.LOCAL_DIR, localDirs); + + final FileContext lfc = FileContext.getLocalFSFileContext(conf); + lfc.create(mapOut, EnumSet.of(CREATE)).close(); + lfc.create(mapOutIdx, EnumSet.of(CREATE)).close(); + + final JobId jobId = MRBuilderUtils.newJobId(12345L, 1, 2); + final TaskId tid = MRBuilderUtils.newTaskId(jobId, 0, TaskType.MAP); + final TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 0); + + LocalContainerLauncher.renameMapOutputForReduce(conf, taid, mrOutputFiles); + } } From e8a1ff7efea5bc8754ca6913fd29363efc3f0265 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 16 Jul 2014 23:42:34 +0000 Subject: [PATCH 13/49] MAPREDUCE-5971. Move the default options for distcp -p to DistCpOptionSwitch. Contributed by Charles Lamb. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611217 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java | 1 + .../src/main/java/org/apache/hadoop/tools/OptionsParser.java | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 35ba7a8f481..c9e74516ffe 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -153,6 +153,9 @@ Release 2.6.0 - UNRELEASED IMPROVEMENTS + MAPREDUCE-5971. Move the default options for distcp -p to + DistCpOptionSwitch. (clamb via wang) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java index 2f2eb7c838c..e77b6e183f0 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java @@ -162,6 +162,7 @@ public enum DistCpOptionSwitch { BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB, new Option("bandwidth", true, "Specify bandwidth per map in MB")); + static final String PRESERVE_STATUS_DEFAULT = "-prbugpc"; private final String confLabel; private final Option option; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java index 09e85505227..4bbc30dea29 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java @@ -50,7 +50,7 @@ public class OptionsParser { protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) { for (int index = 0; index < arguments.length; index++) { if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) { - arguments[index] = "-prbugpc"; + arguments[index] = DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT; } } return super.flatten(options, arguments, stopAtNonOption); From c477a166e18e122b101c372b1c0a2f362e53866d Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Wed, 16 Jul 2014 23:55:32 +0000 Subject: [PATCH 14/49] HDFS-2538. option to disable fsck dots. Contributed by Mohammad Kamrul Islam. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611220 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/src/site/apt/CommandsManual.apt.vm | 4 +++- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../apache/hadoop/hdfs/server/namenode/NamenodeFsck.java | 9 +++++++-- .../main/java/org/apache/hadoop/hdfs/tools/DFSck.java | 6 ++++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm index f4fabab7262..149c2202506 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm @@ -127,7 +127,7 @@ User Commands Runs a HDFS filesystem checking utility. See {{{../hadoop-hdfs/HdfsUserGuide.html#fsck}fsck}} for more info. - Usage: << [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>> + Usage: << [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]] [-showprogress]>>> *------------------+---------------------------------------------+ || COMMAND_OPTION || Description @@ -148,6 +148,8 @@ User Commands *------------------+---------------------------------------------+ | -racks | Print out network topology for data-node locations. *------------------+---------------------------------------------+ +| -showprogress | Print out show progress in output. Default is OFF (no progress). +*------------------+---------------------------------------------+ * <<>> diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0472583e39a..ee2bc2e67f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -12,6 +12,8 @@ Trunk (Unreleased) HDFS-5570. Deprecate hftp / hsftp and replace them with webhdfs / swebhdfs. (wheat9) + HDFS-2538. option to disable fsck dots (Mohammad Kamrul Islam via aw) + NEW FEATURES HDFS-3125. Add JournalService to enable Journal Daemon. (suresh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 542e60e4016..5cc8a4797e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -126,6 +126,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { private boolean showBlocks = false; private boolean showLocations = false; private boolean showRacks = false; + private boolean showprogress = false; private boolean showCorruptFileBlocks = false; /** @@ -203,6 +204,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { else if (key.equals("blocks")) { this.showBlocks = true; } else if (key.equals("locations")) { this.showLocations = true; } else if (key.equals("racks")) { this.showRacks = true; } + else if (key.equals("showprogress")) { this.showprogress = true; } else if (key.equals("openforwrite")) {this.showOpenFiles = true; } else if (key.equals("listcorruptfileblocks")) { this.showCorruptFileBlocks = true; @@ -381,10 +383,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { } else if (showFiles) { out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): "); - } else { + } else if (showprogress) { out.print('.'); } - if (res.totalFiles % 100 == 0) { out.println(); out.flush(); } + if ((showprogress) && res.totalFiles % 100 == 0) { + out.println(); + out.flush(); + } int missing = 0; int corrupt = 0; long missize = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java index db253b6f270..b91090d1677 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java @@ -77,7 +77,7 @@ public class DFSck extends Configured implements Tool { private static final String USAGE = "Usage: DFSck " + "[-list-corruptfileblocks | " + "[-move | -delete | -openforwrite] " - + "[-files [-blocks [-locations | -racks]]]]\n" + + "[-files [-blocks [-locations | -racks]]]] [-showprogress]\n" + "\t\tstart checking from this path\n" + "\t-move\tmove corrupted files to /lost+found\n" + "\t-delete\tdelete corrupted files\n" @@ -90,7 +90,8 @@ public class DFSck extends Configured implements Tool { + "blocks and files they belong to\n" + "\t-blocks\tprint out block report\n" + "\t-locations\tprint out locations for every block\n" - + "\t-racks\tprint out network topology for data-node locations\n\n" + + "\t-racks\tprint out network topology for data-node locations\n" + + "\t-showprogress\tshow progress in output. Default is OFF (no progress)\n\n" + "Please Note:\n" + "\t1. By default fsck ignores files opened for write, " + "use -openforwrite to report such files. They are usually " @@ -270,6 +271,7 @@ public class DFSck extends Configured implements Tool { else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); } else if (args[idx].equals("-locations")) { url.append("&locations=1"); } else if (args[idx].equals("-racks")) { url.append("&racks=1"); } + else if (args[idx].equals("-showprogress")) { url.append("&showprogress=1"); } else if (args[idx].equals("-list-corruptfileblocks")) { url.append("&listcorruptfileblocks=1"); doListCorruptFileBlocks = true; From bda23181bf1931c37d2590e03da9acdc6688ad34 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 17 Jul 2014 00:14:56 +0000 Subject: [PATCH 15/49] YARN-2219. Changed ResourceManager to avoid AMs and NMs getting exceptions after RM recovery but before scheduler learns about apps and app-attempts. Contributed by Jian He. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611222 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 +++ .../resourcemanager/rmapp/RMAppImpl.java | 26 ++++++++-------- .../rmapp/attempt/RMAppAttemptImpl.java | 6 ++-- .../scheduler/capacity/CapacityScheduler.java | 30 ++++++++++++------- .../event/AppAddedSchedulerEvent.java | 10 +++++++ .../event/AppAttemptAddedSchedulerEvent.java | 12 ++++---- .../scheduler/fair/FairScheduler.java | 30 ++++++++++++------- .../scheduler/fifo/FifoScheduler.java | 30 ++++++++++++------- .../resourcemanager/TestFifoScheduler.java | 2 +- .../TestWorkPreservingRMRestart.java | 30 +++++++++++++++++++ .../scheduler/fair/FairSchedulerTestBase.java | 2 +- .../scheduler/fair/TestFairScheduler.java | 12 ++++---- 12 files changed, 131 insertions(+), 63 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6f8567e205d..a07686ce0d0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -62,6 +62,10 @@ Release 2.6.0 - UNRELEASED YARN-2264. Fixed a race condition in DrainDispatcher which may cause random test failures. (Li Lu via jianhe) + YARN-2219. Changed ResourceManager to avoid AMs and NMs getting exceptions + after RM recovery but before scheduler learns about apps and app-attempts. + (Jian He via vinodkv) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 523e6beb512..efa1ee72808 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -205,12 +205,6 @@ public class RMAppImpl implements RMApp, Recoverable { .addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED, RMAppEventType.APP_RUNNING_ON_NODE, new AppRunningOnNodeTransition()) - // ACCECPTED state can once again receive APP_ACCEPTED event, because on - // recovery the app returns ACCEPTED state and the app once again go - // through the scheduler and triggers one more APP_ACCEPTED event at - // ACCEPTED state. - .addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED, - RMAppEventType.APP_ACCEPTED) // Transitions from RUNNING state .addTransition(RMAppState.RUNNING, RMAppState.RUNNING, @@ -789,8 +783,18 @@ public class RMAppImpl implements RMApp, Recoverable { return app.recoveredFinalState; } - // Notify scheduler about the app on recovery - new AddApplicationToSchedulerTransition().transition(app, event); + // No existent attempts means the attempt associated with this app was not + // started or started but not yet saved. + if (app.attempts.isEmpty()) { + app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId, + app.submissionContext.getQueue(), app.user)); + return RMAppState.SUBMITTED; + } + + // Add application to scheduler synchronously to guarantee scheduler + // knows applications before AM or NM re-registers. + app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId, + app.submissionContext.getQueue(), app.user, true)); // recover attempts app.recoverAppAttempts(); @@ -805,12 +809,6 @@ public class RMAppImpl implements RMApp, Recoverable { return RMAppState.ACCEPTED; } - // No existent attempts means the attempt associated with this app was not - // started or started but not yet saved. - if (app.attempts.isEmpty()) { - return RMAppState.SUBMITTED; - } - // YARN-1507 is saving the application state after the application is // accepted. So after YARN-1507, an app is saved meaning it is accepted. // Thus we return ACCECPTED state on recovery. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 000227a8183..dbcf64fc391 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -926,8 +926,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { appAttempt.masterService .registerAppAttempt(appAttempt.applicationAttemptId); - appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent( - appAttempt.getAppAttemptId(), false, false)); + // Add attempt to scheduler synchronously to guarantee scheduler + // knows attempts before AM or NM re-registers. + appAttempt.scheduler.handle(new AppAttemptAddedSchedulerEvent( + appAttempt.getAppAttemptId(), false, true)); } /* diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 649eb92019e..6d26519ff98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -521,7 +521,7 @@ public class CapacityScheduler extends } private synchronized void addApplication(ApplicationId applicationId, - String queueName, String user) { + String queueName, String user, boolean isAppRecovering) { // santiy checks. CSQueue queue = getQueue(queueName); if (queue == null) { @@ -553,14 +553,20 @@ public class CapacityScheduler extends applications.put(applicationId, application); LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queueName); - rmContext.getDispatcher().getEventHandler() + if (isAppRecovering) { + if (LOG.isDebugEnabled()) { + LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED"); + } + } else { + rmContext.getDispatcher().getEventHandler() .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); + } } private synchronized void addApplicationAttempt( ApplicationAttemptId applicationAttemptId, boolean transferStateFromPreviousAttempt, - boolean shouldNotifyAttemptAdded) { + boolean isAttemptRecovering) { SchedulerApplication application = applications.get(applicationAttemptId.getApplicationId()); CSQueue queue = (CSQueue) application.getQueue(); @@ -578,14 +584,15 @@ public class CapacityScheduler extends LOG.info("Added Application Attempt " + applicationAttemptId + " to scheduler from user " + application.getUser() + " in queue " + queue.getQueueName()); - if (shouldNotifyAttemptAdded) { - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptEvent(applicationAttemptId, - RMAppAttemptEventType.ATTEMPT_ADDED)); - } else { + if (isAttemptRecovering) { if (LOG.isDebugEnabled()) { - LOG.debug("Skipping notifying ATTEMPT_ADDED"); + LOG.debug(applicationAttemptId + + " is recovering. Skipping notifying ATTEMPT_ADDED"); } + } else { + rmContext.getDispatcher().getEventHandler().handle( + new RMAppAttemptEvent(applicationAttemptId, + RMAppAttemptEventType.ATTEMPT_ADDED)); } } @@ -905,7 +912,8 @@ public class CapacityScheduler extends { AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; addApplication(appAddedEvent.getApplicationId(), - appAddedEvent.getQueue(), appAddedEvent.getUser()); + appAddedEvent.getQueue(), appAddedEvent.getUser(), + appAddedEvent.getIsAppRecovering()); } break; case APP_REMOVED: @@ -921,7 +929,7 @@ public class CapacityScheduler extends (AppAttemptAddedSchedulerEvent) event; addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), - appAttemptAddedEvent.getShouldNotifyAttemptAdded()); + appAttemptAddedEvent.getIsAttemptRecovering()); } break; case APP_ATTEMPT_REMOVED: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java index d6fb36df78b..7e0b89e20f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java @@ -25,13 +25,20 @@ public class AppAddedSchedulerEvent extends SchedulerEvent { private final ApplicationId applicationId; private final String queue; private final String user; + private final boolean isAppRecovering; public AppAddedSchedulerEvent( ApplicationId applicationId, String queue, String user) { + this(applicationId, queue, user, false); + } + + public AppAddedSchedulerEvent(ApplicationId applicationId, String queue, + String user, boolean isAppRecovering) { super(SchedulerEventType.APP_ADDED); this.applicationId = applicationId; this.queue = queue; this.user = user; + this.isAppRecovering = isAppRecovering; } public ApplicationId getApplicationId() { @@ -46,4 +53,7 @@ public class AppAddedSchedulerEvent extends SchedulerEvent { return user; } + public boolean getIsAppRecovering() { + return isAppRecovering; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java index 64d308adea1..6e66d2a6601 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java @@ -24,22 +24,22 @@ public class AppAttemptAddedSchedulerEvent extends SchedulerEvent { private final ApplicationAttemptId applicationAttemptId; private final boolean transferStateFromPreviousAttempt; - private final boolean shouldNotifyAttemptAdded; + private final boolean isAttemptRecovering; public AppAttemptAddedSchedulerEvent( ApplicationAttemptId applicationAttemptId, boolean transferStateFromPreviousAttempt) { - this(applicationAttemptId, transferStateFromPreviousAttempt, true); + this(applicationAttemptId, transferStateFromPreviousAttempt, false); } public AppAttemptAddedSchedulerEvent( ApplicationAttemptId applicationAttemptId, boolean transferStateFromPreviousAttempt, - boolean shouldNotifyAttemptAdded) { + boolean isAttemptRecovering) { super(SchedulerEventType.APP_ATTEMPT_ADDED); this.applicationAttemptId = applicationAttemptId; this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt; - this.shouldNotifyAttemptAdded = shouldNotifyAttemptAdded; + this.isAttemptRecovering = isAttemptRecovering; } public ApplicationAttemptId getApplicationAttemptId() { @@ -50,7 +50,7 @@ public class AppAttemptAddedSchedulerEvent extends SchedulerEvent { return transferStateFromPreviousAttempt; } - public boolean getShouldNotifyAttemptAdded() { - return shouldNotifyAttemptAdded; + public boolean getIsAttemptRecovering() { + return isAttemptRecovering; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 7e867554f9b..3a847ce7589 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -566,7 +566,7 @@ public class FairScheduler extends * configured limits, but the app will not be marked as runnable. */ protected synchronized void addApplication(ApplicationId applicationId, - String queueName, String user) { + String queueName, String user, boolean isAppRecovering) { if (queueName == null || queueName.isEmpty()) { String message = "Reject application " + applicationId + " submitted by user " + user + " with an empty queue name."; @@ -603,8 +603,14 @@ public class FairScheduler extends LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queueName + ", currently num of applications: " + applications.size()); - rmContext.getDispatcher().getEventHandler() + if (isAppRecovering) { + if (LOG.isDebugEnabled()) { + LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED"); + } + } else { + rmContext.getDispatcher().getEventHandler() .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); + } } /** @@ -613,7 +619,7 @@ public class FairScheduler extends protected synchronized void addApplicationAttempt( ApplicationAttemptId applicationAttemptId, boolean transferStateFromPreviousAttempt, - boolean shouldNotifyAttemptAdded) { + boolean isAttemptRecovering) { SchedulerApplication application = applications.get(applicationAttemptId.getApplicationId()); String user = application.getUser(); @@ -642,14 +648,15 @@ public class FairScheduler extends LOG.info("Added Application Attempt " + applicationAttemptId + " to scheduler from user: " + user); - if (shouldNotifyAttemptAdded) { - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptEvent(applicationAttemptId, - RMAppAttemptEventType.ATTEMPT_ADDED)); - } else { + if (isAttemptRecovering) { if (LOG.isDebugEnabled()) { - LOG.debug("Skipping notifying ATTEMPT_ADDED"); + LOG.debug(applicationAttemptId + + " is recovering. Skipping notifying ATTEMPT_ADDED"); } + } else { + rmContext.getDispatcher().getEventHandler().handle( + new RMAppAttemptEvent(applicationAttemptId, + RMAppAttemptEventType.ATTEMPT_ADDED)); } } @@ -1136,7 +1143,8 @@ public class FairScheduler extends } AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; addApplication(appAddedEvent.getApplicationId(), - appAddedEvent.getQueue(), appAddedEvent.getUser()); + appAddedEvent.getQueue(), appAddedEvent.getUser(), + appAddedEvent.getIsAppRecovering()); break; case APP_REMOVED: if (!(event instanceof AppRemovedSchedulerEvent)) { @@ -1154,7 +1162,7 @@ public class FairScheduler extends (AppAttemptAddedSchedulerEvent) event; addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), - appAttemptAddedEvent.getShouldNotifyAttemptAdded()); + appAttemptAddedEvent.getIsAttemptRecovering()); break; case APP_ATTEMPT_REMOVED: if (!(event instanceof AppAttemptRemovedSchedulerEvent)) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index b017db7321e..571d0558c04 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -356,22 +356,28 @@ public class FifoScheduler extends @VisibleForTesting public synchronized void addApplication(ApplicationId applicationId, - String queue, String user) { + String queue, String user, boolean isAppRecovering) { SchedulerApplication application = new SchedulerApplication(DEFAULT_QUEUE, user); applications.put(applicationId, application); metrics.submitApp(user); LOG.info("Accepted application " + applicationId + " from user: " + user + ", currently num of applications: " + applications.size()); - rmContext.getDispatcher().getEventHandler() + if (isAppRecovering) { + if (LOG.isDebugEnabled()) { + LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED"); + } + } else { + rmContext.getDispatcher().getEventHandler() .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); + } } @VisibleForTesting public synchronized void addApplicationAttempt(ApplicationAttemptId appAttemptId, boolean transferStateFromPreviousAttempt, - boolean shouldNotifyAttemptAdded) { + boolean isAttemptRecovering) { SchedulerApplication application = applications.get(appAttemptId.getApplicationId()); String user = application.getUser(); @@ -389,14 +395,15 @@ public class FifoScheduler extends metrics.submitAppAttempt(user); LOG.info("Added Application Attempt " + appAttemptId + " to scheduler from user " + application.getUser()); - if (shouldNotifyAttemptAdded) { - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptEvent(appAttemptId, - RMAppAttemptEventType.ATTEMPT_ADDED)); - } else { + if (isAttemptRecovering) { if (LOG.isDebugEnabled()) { - LOG.debug("Skipping notifying ATTEMPT_ADDED"); + LOG.debug(appAttemptId + + " is recovering. Skipping notifying ATTEMPT_ADDED"); } + } else { + rmContext.getDispatcher().getEventHandler().handle( + new RMAppAttemptEvent(appAttemptId, + RMAppAttemptEventType.ATTEMPT_ADDED)); } } @@ -772,7 +779,8 @@ public class FifoScheduler extends { AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; addApplication(appAddedEvent.getApplicationId(), - appAddedEvent.getQueue(), appAddedEvent.getUser()); + appAddedEvent.getQueue(), appAddedEvent.getUser(), + appAddedEvent.getIsAppRecovering()); } break; case APP_REMOVED: @@ -788,7 +796,7 @@ public class FifoScheduler extends (AppAttemptAddedSchedulerEvent) event; addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), - appAttemptAddedEvent.getShouldNotifyAttemptAdded()); + appAttemptAddedEvent.getIsAttemptRecovering()); } break; case APP_ATTEMPT_REMOVED: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index aa7f63144eb..b8b41333e66 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -228,7 +228,7 @@ public class TestFifoScheduler { scheduler.handle(new NodeAddedSchedulerEvent(node)); ApplicationId appId = ApplicationId.newInstance(0, 1); - scheduler.addApplication(appId, "queue1", "user1"); + scheduler.addApplication(appId, "queue1", "user1", true); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index 59b11ef8228..24a2f437953 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -610,6 +610,36 @@ public class TestWorkPreservingRMRestart { attempt0.getMasterContainer().getId()).isAMContainer()); } + @Test (timeout = 20000) + public void testRecoverSchedulerAppAndAttemptSynchronously() throws Exception { + // start RM + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + rm1 = new MockRM(conf, memStore); + rm1.start(); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService()); + nm1.registerNode(); + + // create app and launch the AM + RMApp app0 = rm1.submitApp(200); + MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1); + + rm2 = new MockRM(conf, memStore); + rm2.start(); + nm1.setResourceTrackerService(rm2.getResourceTrackerService()); + // scheduler app/attempt is immediately available after RM is re-started. + Assert.assertNotNull(rm2.getResourceScheduler().getSchedulerAppInfo( + am0.getApplicationAttemptId())); + + // getTransferredContainers should not throw NPE. + ((AbstractYarnScheduler) rm2.getResourceScheduler()) + .getTransferredContainers(am0.getApplicationAttemptId()); + + List containers = createNMContainerStatusForApp(am0); + nm1.registerNode(containers, null); + waitForNumContainersToRecover(2, rm2, am0.getApplicationAttemptId()); + } private void asserteMetrics(QueueMetrics qm, int appsSubmitted, int appsPending, int appsRunning, int appsCompleted, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index 50f61d8d592..7d666dadb60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -147,7 +147,7 @@ public class FairSchedulerTestBase { int memory, int vcores, String queueId, String userId, int numContainers, int priority) { ApplicationAttemptId id = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplication(id.getApplicationId(), queueId, userId); + scheduler.addApplication(id.getApplicationId(), queueId, userId, true); // This conditional is for testAclSubmitApplication where app is rejected // and no app is added. if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 20c386714dc..7025fc48682 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -793,13 +793,13 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId id11 = createAppAttemptId(1, 1); - scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1"); + scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1", true); scheduler.addApplicationAttempt(id11, false, true); ApplicationAttemptId id21 = createAppAttemptId(2, 1); - scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1"); + scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1", true); scheduler.addApplicationAttempt(id21, false, true); ApplicationAttemptId id22 = createAppAttemptId(2, 2); - scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1"); + scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1", true); scheduler.addApplicationAttempt(id22, false, true); int minReqSize = @@ -1561,7 +1561,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.handle(nodeEvent2); ApplicationAttemptId appId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplication(appId.getApplicationId(), "queue1", "user1"); + scheduler.addApplication(appId.getApplicationId(), "queue1", "user1", true); scheduler.addApplicationAttempt(appId, false, true); // 1 request with 2 nodes on the same rack. another request with 1 node on @@ -1843,7 +1843,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { ApplicationAttemptId attId = ApplicationAttemptId.newInstance(applicationId, this.ATTEMPT_ID++); - scheduler.addApplication(attId.getApplicationId(), queue, user); + scheduler.addApplication(attId.getApplicationId(), queue, user, true); numTries = 0; while (application.getFinishTime() == 0 && numTries < MAX_TRIES) { @@ -2720,7 +2720,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { // send application request ApplicationAttemptId appAttemptId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11"); + fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", true); fs.addApplicationAttempt(appAttemptId, false, true); List ask = new ArrayList(); ResourceRequest request = From ca52cb01c40f09585431ef1c1c1ba4044657e8c7 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Thu, 17 Jul 2014 00:44:16 +0000 Subject: [PATCH 16/49] HDFS-6690. Deduplicate xattr names in memory. (wang) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611226 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hdfs/server/namenode/XAttrStorage.java | 40 ++++++++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ee2bc2e67f9..d82a48cccaa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -294,6 +294,8 @@ Release 2.6.0 - UNRELEASED OPTIMIZATIONS + HDFS-6690. Deduplicate xattr names in memory. (wang) + BUG FIXES HDFS-6617. Flake TestDFSZKFailoverController.testManualFailoverWithDFSHAAdmin diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrStorage.java index fdb549648f2..7e843d207ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrStorage.java @@ -19,24 +19,30 @@ package org.apache.hadoop.hdfs.server.namenode; import java.util.List; +import java.util.Map; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; -import org.apache.hadoop.hdfs.server.namenode.INode; - -import com.google.common.collect.ImmutableList; /** * XAttrStorage is used to read and set xattrs for an inode. */ @InterfaceAudience.Private public class XAttrStorage { - + + private static final Map internedNames = Maps.newHashMap(); + /** * Reads the existing extended attributes of an inode. If the * inode does not have an XAttr, then this method * returns an empty list. + *

+ * Must be called while holding the FSDirectory read lock. + * * @param inode INode to read * @param snapshotId * @return List XAttr list. @@ -48,6 +54,9 @@ public class XAttrStorage { /** * Reads the existing extended attributes of an inode. + *

+ * Must be called while holding the FSDirectory read lock. + * * @param inode INode to read. * @return List XAttr list. */ @@ -58,6 +67,9 @@ public class XAttrStorage { /** * Update xattrs of inode. + *

+ * Must be called while holding the FSDirectory write lock. + * * @param inode INode to update * @param xAttrs to update xAttrs. * @param snapshotId id of the latest snapshot of the inode @@ -70,8 +82,24 @@ public class XAttrStorage { } return; } - - ImmutableList newXAttrs = ImmutableList.copyOf(xAttrs); + // Dedupe the xAttr name and save them into a new interned list + List internedXAttrs = Lists.newArrayListWithCapacity(xAttrs.size()); + for (XAttr xAttr : xAttrs) { + final String name = xAttr.getName(); + String internedName = internedNames.get(name); + if (internedName == null) { + internedName = name; + internedNames.put(internedName, internedName); + } + XAttr internedXAttr = new XAttr.Builder() + .setName(internedName) + .setNameSpace(xAttr.getNameSpace()) + .setValue(xAttr.getValue()) + .build(); + internedXAttrs.add(internedXAttr); + } + // Save the list of interned xattrs + ImmutableList newXAttrs = ImmutableList.copyOf(internedXAttrs); if (inode.getXAttrFeature() != null) { inode.removeXAttrFeature(snapshotId); } From 9dff25e8904f66e71b3fabc4262fff533dbf2013 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Thu, 17 Jul 2014 00:49:08 +0000 Subject: [PATCH 17/49] HADOOP-10816. KeyShell returns -1 on error to the shell, should be 1. (Mike Yoder via wang) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611229 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 ++ .../apache/hadoop/crypto/key/KeyShell.java | 41 ++++++++++++------- .../hadoop/crypto/key/TestKeyShell.java | 16 ++++---- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 75dba1892a2..d8da5bb5001 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -381,6 +381,9 @@ Trunk (Unreleased) HADOOP-10834. Typo in CredentialShell usage. (Benoy Antony via umamahesh) + HADOOP-10816. KeyShell returns -1 on error to the shell, should be 1. + (Mike Yoder via wang) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java index 80dd9a0326d..fb01e5f7c5b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java @@ -57,6 +57,16 @@ public class KeyShell extends Configured implements Tool { private boolean userSuppliedProvider = false; + /** + * Primary entry point for the KeyShell; called via main(). + * + * @param args Command line arguments. + * @return 0 on success and 1 on failure. This value is passed back to + * the unix shell, so we must follow shell return code conventions: + * the return code is an unsigned character, and 0 means success, and + * small positive integers mean failure. + * @throws Exception + */ @Override public int run(String[] args) throws Exception { int exitCode = 0; @@ -68,11 +78,11 @@ public class KeyShell extends Configured implements Tool { if (command.validate()) { command.execute(); } else { - exitCode = -1; + exitCode = 1; } } catch (Exception e) { e.printStackTrace(err); - return -1; + return 1; } return exitCode; } @@ -86,8 +96,8 @@ public class KeyShell extends Configured implements Tool { * % hadoop key list [-provider providerPath] * % hadoop key delete keyName [--provider providerPath] [-i] * - * @param args - * @return + * @param args Command line arguments. + * @return 0 on success, 1 on failure. * @throws IOException */ private int init(String[] args) throws IOException { @@ -105,7 +115,7 @@ public class KeyShell extends Configured implements Tool { command = new CreateCommand(keyName, options); if ("--help".equals(keyName)) { printKeyShellUsage(); - return -1; + return 1; } } else if (args[i].equals("delete")) { String keyName = "--help"; @@ -116,7 +126,7 @@ public class KeyShell extends Configured implements Tool { command = new DeleteCommand(keyName); if ("--help".equals(keyName)) { printKeyShellUsage(); - return -1; + return 1; } } else if (args[i].equals("roll")) { String keyName = "--help"; @@ -127,7 +137,7 @@ public class KeyShell extends Configured implements Tool { command = new RollCommand(keyName); if ("--help".equals(keyName)) { printKeyShellUsage(); - return -1; + return 1; } } else if ("list".equals(args[i])) { command = new ListCommand(); @@ -145,13 +155,13 @@ public class KeyShell extends Configured implements Tool { out.println("\nAttributes must be in attribute=value form, " + "or quoted\nlike \"attribute = value\"\n"); printKeyShellUsage(); - return -1; + return 1; } if (attributes.containsKey(attr)) { out.println("\nEach attribute must correspond to only one value:\n" + "atttribute \"" + attr + "\" was repeated\n" ); printKeyShellUsage(); - return -1; + return 1; } attributes.put(attr, val); } else if ("--provider".equals(args[i]) && moreTokens) { @@ -163,17 +173,17 @@ public class KeyShell extends Configured implements Tool { interactive = true; } else if ("--help".equals(args[i])) { printKeyShellUsage(); - return -1; + return 1; } else { printKeyShellUsage(); ToolRunner.printGenericCommandUsage(System.err); - return -1; + return 1; } } if (command == null) { printKeyShellUsage(); - return -1; + return 1; } if (!attributes.isEmpty()) { @@ -491,10 +501,11 @@ public class KeyShell extends Configured implements Tool { } /** - * Main program. + * main() entry point for the KeyShell. While strictly speaking the + * return is void, it will System.exit() with a return code: 0 is for + * success and 1 for failure. * - * @param args - * Command line arguments + * @param args Command line arguments. * @throws Exception */ public static void main(String[] args) throws Exception { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java index b1882a660f4..154579b567d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java @@ -161,7 +161,7 @@ public class TestKeyShell { KeyShell ks = new KeyShell(); ks.setConf(new Configuration()); rc = ks.run(args1); - assertEquals(-1, rc); + assertEquals(1, rc); assertTrue(outContent.toString().contains("key1 has not been created.")); } @@ -174,7 +174,7 @@ public class TestKeyShell { KeyShell ks = new KeyShell(); ks.setConf(new Configuration()); rc = ks.run(args1); - assertEquals(-1, rc); + assertEquals(1, rc); assertTrue(outContent.toString().contains("key1 has not been created.")); } @@ -187,7 +187,7 @@ public class TestKeyShell { KeyShell ks = new KeyShell(); ks.setConf(new Configuration()); rc = ks.run(args1); - assertEquals(-1, rc); + assertEquals(1, rc); assertTrue(outContent.toString().contains("There are no valid " + "KeyProviders configured.")); } @@ -216,7 +216,7 @@ public class TestKeyShell { config.set(KeyProviderFactory.KEY_PROVIDER_PATH, "user:///"); ks.setConf(config); rc = ks.run(args1); - assertEquals(-1, rc); + assertEquals(1, rc); assertTrue(outContent.toString().contains("There are no valid " + "KeyProviders configured.")); } @@ -262,19 +262,19 @@ public class TestKeyShell { final String[] args2 = {"create", "keyattr2", "--provider", jceksProvider, "--attr", "=bar"}; rc = ks.run(args2); - assertEquals(-1, rc); + assertEquals(1, rc); /* Not in attribute = value form */ outContent.reset(); args2[5] = "foo"; rc = ks.run(args2); - assertEquals(-1, rc); + assertEquals(1, rc); /* No attribute or value */ outContent.reset(); args2[5] = "="; rc = ks.run(args2); - assertEquals(-1, rc); + assertEquals(1, rc); /* Legal: attribute is a, value is b=c */ outContent.reset(); @@ -308,7 +308,7 @@ public class TestKeyShell { "--attr", "foo=bar", "--attr", "foo=glarch"}; rc = ks.run(args4); - assertEquals(-1, rc); + assertEquals(1, rc); /* Clean up to be a good citizen */ deleteKey(ks, "keyattr1"); From 3c193811ca36f35ccda40a4c93c91b7476db45b1 Mon Sep 17 00:00:00 2001 From: Jian He Date: Thu, 17 Jul 2014 03:28:39 +0000 Subject: [PATCH 18/49] YARN-2219. Addendum patch for YARN-2219 git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611240 13f79535-47bb-0310-9956-ffa450edef68 --- .../resourcemanager/TestFifoScheduler.java | 4 ++-- .../scheduler/fair/FairSchedulerTestBase.java | 8 +++---- .../scheduler/fair/TestFairScheduler.java | 22 +++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index b8b41333e66..420fc942ae2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -228,7 +228,7 @@ public class TestFifoScheduler { scheduler.handle(new NodeAddedSchedulerEvent(node)); ApplicationId appId = ApplicationId.newInstance(0, 1); - scheduler.addApplication(appId, "queue1", "user1", true); + scheduler.addApplication(appId, "queue1", "user1", false); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); try { @@ -238,7 +238,7 @@ public class TestFifoScheduler { } ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 1); - scheduler.addApplicationAttempt(attId, false, true); + scheduler.addApplicationAttempt(attId, false, false); rm.stop(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index 7d666dadb60..61def878b20 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -147,11 +147,11 @@ public class FairSchedulerTestBase { int memory, int vcores, String queueId, String userId, int numContainers, int priority) { ApplicationAttemptId id = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplication(id.getApplicationId(), queueId, userId, true); + scheduler.addApplication(id.getApplicationId(), queueId, userId, false); // This conditional is for testAclSubmitApplication where app is rejected // and no app is added. if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) { - scheduler.addApplicationAttempt(id, false, true); + scheduler.addApplicationAttempt(id, false, false); } List ask = new ArrayList(); ResourceRequest request = createResourceRequest(memory, vcores, ResourceRequest.ANY, @@ -172,11 +172,11 @@ public class FairSchedulerTestBase { String userId, List ask) { ApplicationAttemptId id = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplication(id.getApplicationId(), queueId, userId); + scheduler.addApplication(id.getApplicationId(), queueId, userId, false); // This conditional is for testAclSubmitApplication where app is rejected // and no app is added. if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) { - scheduler.addApplicationAttempt(id, false, true); + scheduler.addApplicationAttempt(id, false, false); } scheduler.allocate(id, ask, new ArrayList(), null, null); RMApp rmApp = mock(RMApp.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 7025fc48682..ed492cec40e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -793,14 +793,14 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId id11 = createAppAttemptId(1, 1); - scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1", true); - scheduler.addApplicationAttempt(id11, false, true); + scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1", false); + scheduler.addApplicationAttempt(id11, false, false); ApplicationAttemptId id21 = createAppAttemptId(2, 1); - scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1", true); - scheduler.addApplicationAttempt(id21, false, true); + scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1", false); + scheduler.addApplicationAttempt(id21, false, false); ApplicationAttemptId id22 = createAppAttemptId(2, 2); - scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1", true); - scheduler.addApplicationAttempt(id22, false, true); + scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1", false); + scheduler.addApplicationAttempt(id22, false, false); int minReqSize = FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB; @@ -1561,8 +1561,8 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.handle(nodeEvent2); ApplicationAttemptId appId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplication(appId.getApplicationId(), "queue1", "user1", true); - scheduler.addApplicationAttempt(appId, false, true); + scheduler.addApplication(appId.getApplicationId(), "queue1", "user1", false); + scheduler.addApplicationAttempt(appId, false, false); // 1 request with 2 nodes on the same rack. another request with 1 node on // a different rack @@ -1843,7 +1843,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { ApplicationAttemptId attId = ApplicationAttemptId.newInstance(applicationId, this.ATTEMPT_ID++); - scheduler.addApplication(attId.getApplicationId(), queue, user, true); + scheduler.addApplication(attId.getApplicationId(), queue, user, false); numTries = 0; while (application.getFinishTime() == 0 && numTries < MAX_TRIES) { @@ -2720,8 +2720,8 @@ public class TestFairScheduler extends FairSchedulerTestBase { // send application request ApplicationAttemptId appAttemptId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", true); - fs.addApplicationAttempt(appAttemptId, false, true); + fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", false); + fs.addApplicationAttempt(appAttemptId, false, false); List ask = new ArrayList(); ResourceRequest request = createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); From 0a02b5a19bcca8d0c49d2adfc3909aebdf9d606e Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Thu, 17 Jul 2014 05:04:53 +0000 Subject: [PATCH 19/49] HADOOP-10840. Fix OutOfMemoryError caused by metrics system in Azure File System. Contributed by Shanyu Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611247 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 +++ .../fs/azure/NativeAzureFileSystem.java | 27 ++++++++++++++----- .../metrics/AzureFileSystemMetricsSystem.java | 19 ++++++++----- .../fs/azure/AzureBlobStorageTestAccount.java | 4 +-- .../azure/NativeAzureFileSystemBaseTest.java | 7 +++++ 5 files changed, 43 insertions(+), 17 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d8da5bb5001..be9cdc6f037 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -384,6 +384,9 @@ Trunk (Unreleased) HADOOP-10816. KeyShell returns -1 on error to the shell, should be 1. (Mike Yoder via wang) + HADOOP-10840. Fix OutOfMemoryError caused by metrics system in Azure File + System. (Shanyu Zhao via cnauroth) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index f9d7377dfae..577711f4ce0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -373,6 +373,8 @@ public class NativeAzureFileSystem extends FileSystem { private Path workingDir; private long blockSize = MAX_AZURE_BLOCK_SIZE; private AzureFileSystemInstrumentation instrumentation; + private String metricsSourceName; + private boolean isClosed = false; private static boolean suppressRetryPolicy = false; // A counter to create unique (within-process) names for my metrics sources. private static AtomicInteger metricsSourceNameCounter = new AtomicInteger(); @@ -482,11 +484,10 @@ public class NativeAzureFileSystem extends FileSystem { // Make sure the metrics system is available before interacting with Azure AzureFileSystemMetricsSystem.fileSystemStarted(); - String sourceName = newMetricsSourceName(), - sourceDesc = "Azure Storage Volume File System metrics"; - instrumentation = DefaultMetricsSystem.instance().register(sourceName, - sourceDesc, new AzureFileSystemInstrumentation(conf)); - AzureFileSystemMetricsSystem.registerSource(sourceName, sourceDesc, + metricsSourceName = newMetricsSourceName(); + String sourceDesc = "Azure Storage Volume File System metrics"; + instrumentation = new AzureFileSystemInstrumentation(conf); + AzureFileSystemMetricsSystem.registerSource(metricsSourceName, sourceDesc, instrumentation); store.initialize(uri, conf, instrumentation); @@ -502,7 +503,6 @@ public class NativeAzureFileSystem extends FileSystem { LOG.debug(" blockSize = " + conf.getLong(AZURE_BLOCK_SIZE_PROPERTY_NAME, MAX_AZURE_BLOCK_SIZE)); } - } private NativeFileSystemStore createDefaultStore(Configuration conf) { @@ -1337,7 +1337,11 @@ public class NativeAzureFileSystem extends FileSystem { } @Override - public void close() throws IOException { + public synchronized void close() throws IOException { + if (isClosed) { + return; + } + // Call the base close() to close any resources there. super.close(); // Close the store @@ -1349,12 +1353,14 @@ public class NativeAzureFileSystem extends FileSystem { long startTime = System.currentTimeMillis(); + AzureFileSystemMetricsSystem.unregisterSource(metricsSourceName); AzureFileSystemMetricsSystem.fileSystemClosed(); if (LOG.isDebugEnabled()) { LOG.debug("Submitting metrics when file system closed took " + (System.currentTimeMillis() - startTime) + " ms."); } + isClosed = true; } /** @@ -1498,6 +1504,13 @@ public class NativeAzureFileSystem extends FileSystem { handleFilesWithDanglingTempData(root, new DanglingFileDeleter()); } + @Override + protected void finalize() throws Throwable { + LOG.debug("finalize() called."); + close(); + super.finalize(); + } + /** * Encode the key with a random prefix for load balancing in Azure storage. * Upload data to a random temporary file then do storage side renaming to diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java index a5f29c1f33d..322795ab827 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java @@ -44,21 +44,26 @@ public final class AzureFileSystemMetricsSystem { } public static synchronized void fileSystemClosed() { - if (instance != null) { - instance.publishMetricsNow(); - } if (numFileSystems == 1) { + instance.publishMetricsNow(); instance.stop(); instance.shutdown(); instance = null; } numFileSystems--; } - + public static void registerSource(String name, String desc, MetricsSource source) { - // Register the source with the name appended with -WasbSystem - // so that the name is globally unique. - instance.register(name + "-WasbSystem", desc, source); + //caller has to use unique name to register source + instance.register(name, desc, source); + } + + public static synchronized void unregisterSource(String name) { + if (instance != null) { + //publish metrics before unregister a metrics source + instance.publishMetricsNow(); + instance.unregisterSource(name); + } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java index 02738e7efb5..80e8e4351a5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java @@ -324,9 +324,7 @@ public final class AzureBlobStorageTestAccount { String sourceName = NativeAzureFileSystem.newMetricsSourceName(); String sourceDesc = "Azure Storage Volume File System metrics"; - AzureFileSystemInstrumentation instrumentation = - DefaultMetricsSystem.instance().register(sourceName, - sourceDesc, new AzureFileSystemInstrumentation(conf)); + AzureFileSystemInstrumentation instrumentation = new AzureFileSystemInstrumentation(conf); AzureFileSystemMetricsSystem.registerSource( sourceName, sourceDesc, instrumentation); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java index bc7e344540a..e731b21d506 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java @@ -516,6 +516,13 @@ public abstract class NativeAzureFileSystemBaseTest { assertNotNull(status); } + @Test + public void testCloseFileSystemTwice() throws Exception { + //make sure close() can be called multiple times without doing any harm + fs.close(); + fs.close(); + } + private boolean testModifiedTime(Path testPath, long time) throws Exception { FileStatus fileStatus = fs.getFileStatus(testPath); final long errorMargin = modifiedTimeErrorMargin; From f4151bbf4f54dc33836c76e6860aa043a9626e48 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 17 Jul 2014 17:37:22 +0000 Subject: [PATCH 20/49] HDFS-6478. RemoteException can't be retried properly for non-HA scenario. Contributed by Ming Ma. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611410 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../apache/hadoop/hdfs/NameNodeProxies.java | 51 +++++++++---------- ...atanodeProtocolClientSideTranslatorPB.java | 29 +---------- .../NamenodeProtocolTranslatorPB.java | 8 ++- .../apache/hadoop/hdfs/TestFileCreation.java | 17 +++++-- .../hadoop/hdfs/TestIsMethodSupported.java | 34 ++++++++----- 6 files changed, 71 insertions(+), 71 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index d82a48cccaa..0e44c1a95a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -321,6 +321,9 @@ Release 2.6.0 - UNRELEASED HDFS-6689. NFS doesn't return correct lookup access for direcories (brandonli) + HDFS-6478. RemoteException can't be retried properly for non-HA scenario. + (Ming Ma via jing9) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index 2bcb2a16222..ab8f3dc7bd7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -333,19 +333,18 @@ public class NameNodeProxies { address, conf, ugi, NamenodeProtocolPB.class, 0); if (withRetries) { // create the proxy with retries RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200, - TimeUnit.MILLISECONDS); - Map, RetryPolicy> exceptionToPolicyMap - = new HashMap, RetryPolicy>(); - RetryPolicy methodPolicy = RetryPolicies.retryByException(timeoutPolicy, - exceptionToPolicyMap); - Map methodNameToPolicyMap - = new HashMap(); - methodNameToPolicyMap.put("getBlocks", methodPolicy); - methodNameToPolicyMap.put("getAccessKeys", methodPolicy); - proxy = (NamenodeProtocolPB) RetryProxy.create(NamenodeProtocolPB.class, - proxy, methodNameToPolicyMap); + TimeUnit.MILLISECONDS); + Map methodNameToPolicyMap + = new HashMap(); + methodNameToPolicyMap.put("getBlocks", timeoutPolicy); + methodNameToPolicyMap.put("getAccessKeys", timeoutPolicy); + NamenodeProtocol translatorProxy = + new NamenodeProtocolTranslatorPB(proxy); + return (NamenodeProtocol) RetryProxy.create( + NamenodeProtocol.class, translatorProxy, methodNameToPolicyMap); + } else { + return new NamenodeProtocolTranslatorPB(proxy); } - return new NamenodeProtocolTranslatorPB(proxy); } private static ClientProtocol createNNProxyWithClientProtocol( @@ -379,29 +378,27 @@ public class NameNodeProxies { = new HashMap, RetryPolicy>(); remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class, createPolicy); - - Map, RetryPolicy> exceptionToPolicyMap - = new HashMap, RetryPolicy>(); - exceptionToPolicyMap.put(RemoteException.class, RetryPolicies - .retryByRemoteException(defaultPolicy, - remoteExceptionToPolicyMap)); - RetryPolicy methodPolicy = RetryPolicies.retryByException( - defaultPolicy, exceptionToPolicyMap); + + RetryPolicy methodPolicy = RetryPolicies.retryByRemoteException( + defaultPolicy, remoteExceptionToPolicyMap); Map methodNameToPolicyMap = new HashMap(); methodNameToPolicyMap.put("create", methodPolicy); - - proxy = (ClientNamenodeProtocolPB) RetryProxy.create( - ClientNamenodeProtocolPB.class, - new DefaultFailoverProxyProvider( - ClientNamenodeProtocolPB.class, proxy), + + ClientProtocol translatorProxy = + new ClientNamenodeProtocolTranslatorPB(proxy); + return (ClientProtocol) RetryProxy.create( + ClientProtocol.class, + new DefaultFailoverProxyProvider( + ClientProtocol.class, translatorProxy), methodNameToPolicyMap, defaultPolicy); + } else { + return new ClientNamenodeProtocolTranslatorPB(proxy); } - return new ClientNamenodeProtocolTranslatorPB(proxy); } - + private static Object createNameNodeProxy(InetSocketAddress address, Configuration conf, UserGroupInformation ugi, Class xface, int rpcTimeout) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java index 2c039aed236..5775d6e2634 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java @@ -97,7 +97,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements RPC.setProtocolEngine(conf, DatanodeProtocolPB.class, ProtobufRpcEngine.class); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - rpcProxy = createNamenodeWithRetry(createNamenode(nameNodeAddr, conf, ugi)); + rpcProxy = createNamenode(nameNodeAddr, conf, ugi); } private static DatanodeProtocolPB createNamenode( @@ -109,33 +109,6 @@ public class DatanodeProtocolClientSideTranslatorPB implements org.apache.hadoop.ipc.Client.getPingInterval(conf), null).getProxy(); } - /** Create a {@link NameNode} proxy */ - static DatanodeProtocolPB createNamenodeWithRetry( - DatanodeProtocolPB rpcNamenode) { - RetryPolicy createPolicy = RetryPolicies - .retryUpToMaximumCountWithFixedSleep(5, - HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS); - - Map, RetryPolicy> remoteExceptionToPolicyMap = - new HashMap, RetryPolicy>(); - remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class, - createPolicy); - - Map, RetryPolicy> exceptionToPolicyMap = - new HashMap, RetryPolicy>(); - exceptionToPolicyMap.put(RemoteException.class, RetryPolicies - .retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL, - remoteExceptionToPolicyMap)); - RetryPolicy methodPolicy = RetryPolicies.retryByException( - RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); - Map methodNameToPolicyMap = new HashMap(); - - methodNameToPolicyMap.put("create", methodPolicy); - - return (DatanodeProtocolPB) RetryProxy.create(DatanodeProtocolPB.class, - rpcNamenode, methodNameToPolicyMap); - } - @Override public void close() throws IOException { RPC.stopProxy(rpcProxy); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java index 87f105c7d8c..98e99f0e061 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.ipc.ProtobufHelper; import org.apache.hadoop.ipc.ProtocolMetaInterface; +import org.apache.hadoop.ipc.ProtocolTranslator; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RpcClientUtil; @@ -61,7 +62,7 @@ import com.google.protobuf.ServiceException; @InterfaceAudience.Private @InterfaceStability.Stable public class NamenodeProtocolTranslatorPB implements NamenodeProtocol, - ProtocolMetaInterface, Closeable { + ProtocolMetaInterface, Closeable, ProtocolTranslator { /** RpcController is not used and hence is set to null */ private final static RpcController NULL_CONTROLLER = null; @@ -88,6 +89,11 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol, RPC.stopProxy(rpcProxy); } + @Override + public Object getUnderlyingProxyObject() { + return rpcProxy; + } + @Override public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java index 64697bf9d82..809e592db7b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java @@ -30,6 +30,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHEC import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; +import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -79,6 +81,7 @@ import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; @@ -97,6 +100,8 @@ public class TestFileCreation { ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); } + private static final String RPC_DETAILED_METRICS = + "RpcDetailedActivityForPort"; static final long seed = 0xDEADBEEFL; static final int blockSize = 8192; @@ -371,7 +376,7 @@ public class TestFileCreation { conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); FileSystem fs = cluster.getFileSystem(); - + UserGroupInformation otherUgi = UserGroupInformation.createUserForTesting( "testuser", new String[]{"testgroup"}); FileSystem fs2 = otherUgi.doAs(new PrivilegedExceptionAction() { @@ -380,12 +385,16 @@ public class TestFileCreation { return FileSystem.get(cluster.getConfiguration(0)); } }); - + + String metricsName = RPC_DETAILED_METRICS + cluster.getNameNodePort(); + try { Path p = new Path("/testfile"); FSDataOutputStream stm1 = fs.create(p); stm1.write(1); + assertCounter("CreateNumOps", 1L, getMetrics(metricsName)); + // Create file again without overwrite try { fs2.create(p, false); @@ -394,7 +403,9 @@ public class TestFileCreation { GenericTestUtils.assertExceptionContains("already being created by", abce); } - + // NameNodeProxies' createNNProxyWithClientProtocol has 5 retries. + assertCounter("AlreadyBeingCreatedExceptionNumOps", + 6L, getMetrics(metricsName)); FSDataOutputStream stm2 = fs2.create(p, true); stm2.write(2); stm2.close(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java index c6bd7ba2848..2e4a08bf0ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java @@ -25,14 +25,16 @@ import java.net.InetSocketAddress; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB; -import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB; +import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB; -import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB; +import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.RpcClientUtil; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.RefreshUserMappingsProtocol; import org.apache.hadoop.security.UserGroupInformation; @@ -76,16 +78,22 @@ public class TestIsMethodSupported { @Test public void testNamenodeProtocol() throws IOException { - NamenodeProtocolTranslatorPB translator = - (NamenodeProtocolTranslatorPB) NameNodeProxies.createNonHAProxy(conf, + NamenodeProtocol np = + NameNodeProxies.createNonHAProxy(conf, nnAddress, NamenodeProtocol.class, UserGroupInformation.getCurrentUser(), true).getProxy(); - boolean exists = translator.isMethodSupported("rollEditLog"); + + boolean exists = RpcClientUtil.isMethodSupported(np, + NamenodeProtocolPB.class, RPC.RpcKind.RPC_PROTOCOL_BUFFER, + RPC.getProtocolVersion(NamenodeProtocolPB.class), "rollEditLog"); + assertTrue(exists); - exists = translator.isMethodSupported("bogusMethod"); + exists = RpcClientUtil.isMethodSupported(np, + NamenodeProtocolPB.class, RPC.RpcKind.RPC_PROTOCOL_BUFFER, + RPC.getProtocolVersion(NamenodeProtocolPB.class), "bogusMethod"); assertFalse(exists); } - + @Test public void testDatanodeProtocol() throws IOException { DatanodeProtocolClientSideTranslatorPB translator = @@ -107,16 +115,18 @@ public class TestIsMethodSupported { NetUtils.getDefaultSocketFactory(conf)); assertTrue(translator.isMethodSupported("refreshNamenodes")); } - + @Test public void testClientNamenodeProtocol() throws IOException { - ClientNamenodeProtocolTranslatorPB translator = - (ClientNamenodeProtocolTranslatorPB) NameNodeProxies.createNonHAProxy( + ClientProtocol cp = + NameNodeProxies.createNonHAProxy( conf, nnAddress, ClientProtocol.class, UserGroupInformation.getCurrentUser(), true).getProxy(); - assertTrue(translator.isMethodSupported("mkdirs")); + RpcClientUtil.isMethodSupported(cp, + ClientNamenodeProtocolPB.class, RPC.RpcKind.RPC_PROTOCOL_BUFFER, + RPC.getProtocolVersion(ClientNamenodeProtocolPB.class), "mkdirs"); } - + @Test public void tesJournalProtocol() throws IOException { JournalProtocolTranslatorPB translator = (JournalProtocolTranslatorPB) From 9aaa714a999f998b6acc9904beb414d327c06f8b Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Thu, 17 Jul 2014 17:45:12 +0000 Subject: [PATCH 21/49] HADOOP-10732. Fix locking in credential update. (Ted Yu via omalley) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611415 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 8 +++++--- .../hadoop/security/alias/JavaKeyStoreProvider.java | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index be9cdc6f037..daadcbad627 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -158,9 +158,6 @@ Trunk (Unreleased) HADOOP-10485. Remove dead classes in hadoop-streaming. (wheat9) - HADOOP-10607. Create API to separate credential/password storage from - applications. (Larry McCay via omalley) - HADOOP-10696. Add optional attributes to KeyProvider Options and Metadata. (tucu) @@ -418,6 +415,11 @@ Release 2.6.0 - UNRELEASED HADOOP-10839. Add unregisterSource() to MetricsSystem API. (Shanyu Zhao via cnauroth) + HADOOP-10607. Create an API to separate credentials/password storage + from applications (Larry McCay via omalley) + + HADOOP-10732. Fix locking in credential update. (Ted Yu via omalley) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java index 61958fe413e..551c4ca14ff 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/JavaKeyStoreProvider.java @@ -230,6 +230,7 @@ public class JavaKeyStoreProvider extends CredentialProvider { CredentialEntry innerSetCredential(String alias, char[] material) throws IOException { + writeLock.lock(); try { keyStore.setKeyEntry(alias, new SecretKeySpec( new String(material).getBytes("UTF-8"), "AES"), @@ -237,6 +238,8 @@ public class JavaKeyStoreProvider extends CredentialProvider { } catch (KeyStoreException e) { throw new IOException("Can't store credential " + alias + " in " + this, e); + } finally { + writeLock.unlock(); } changed = true; return new CredentialEntry(alias, material); From 1e7ce76bbab08a153b285739de51c3de924e3580 Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Thu, 17 Jul 2014 18:09:41 +0000 Subject: [PATCH 22/49] HADOOP-10733. Fix potential null dereference in CredShell. (Ted Yu via omalley) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611419 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 +++ .../security/alias/CredentialShell.java | 4 ++-- .../hadoop/security/alias/TestCredShell.java | 20 +++++++++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index daadcbad627..d0705587227 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -420,6 +420,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10732. Fix locking in credential update. (Ted Yu via omalley) + HADOOP-10733. Fix potential null dereference in CredShell. (Ted Yu via + omalley) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java index a89c3c792ae..bb35ce51d48 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java @@ -373,12 +373,12 @@ public class CredentialShell extends Configured implements Tool { char[] newPassword2 = c.readPassword("Enter password again: "); noMatch = !Arrays.equals(newPassword1, newPassword2); if (noMatch) { - Arrays.fill(newPassword1, ' '); + if (newPassword1 != null) Arrays.fill(newPassword1, ' '); c.format("Passwords don't match. Try again.%n"); } else { cred = newPassword1; } - Arrays.fill(newPassword2, ' '); + if (newPassword2 != null) Arrays.fill(newPassword2, ' '); } while (noMatch); return cred; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredShell.java index 34758be95e7..c48b69f2149 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredShell.java @@ -127,6 +127,22 @@ public class TestCredShell { "CredentialProviders configured.")); } + @Test + public void testPromptForCredentialWithEmptyPasswd() throws Exception { + String[] args1 = {"create", "credential1", "--provider", + "jceks://file" + tmpDir + "/credstore.jceks"}; + ArrayList passwords = new ArrayList(); + passwords.add(null); + passwords.add("p@ssw0rd"); + int rc = 0; + CredentialShell shell = new CredentialShell(); + shell.setConf(new Configuration()); + shell.setPasswordReader(new MockPasswordReader(passwords)); + rc = shell.run(args1); + assertEquals(outContent.toString(), -1, rc); + assertTrue(outContent.toString().contains("Passwords don't match")); + } + @Test public void testPromptForCredential() throws Exception { String[] args1 = {"create", "credential1", "--provider", @@ -142,7 +158,7 @@ public class TestCredShell { assertEquals(0, rc); assertTrue(outContent.toString().contains("credential1 has been successfully " + "created.")); - + String[] args2 = {"delete", "credential1", "--provider", "jceks://file" + tmpDir + "/credstore.jceks"}; rc = shell.run(args2); @@ -162,7 +178,7 @@ public class TestCredShell { public char[] readPassword(String prompt) { if (passwords.size() == 0) return null; String pass = passwords.remove(0); - return pass.toCharArray(); + return pass == null ? null : pass.toCharArray(); } @Override From ef9e24f826c271f0456cba0077ac60839affd5dd Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Thu, 17 Jul 2014 18:17:18 +0000 Subject: [PATCH 23/49] HADOOP-10591. Compression codecs must used pooled direct buffers or deallocate direct buffers when stream is closed (cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611423 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../apache/hadoop/io/compress/BZip2Codec.java | 6 +- .../hadoop/io/compress/CompressionCodec.java | 55 +++++++++++++++++++ .../io/compress/CompressionInputStream.java | 10 ++++ .../io/compress/CompressionOutputStream.java | 18 +++++- .../hadoop/io/compress/DefaultCodec.java | 14 ++--- .../apache/hadoop/io/compress/GzipCodec.java | 14 +++-- .../apache/hadoop/io/compress/Lz4Codec.java | 6 +- .../hadoop/io/compress/SnappyCodec.java | 6 +- 9 files changed, 108 insertions(+), 24 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d0705587227..449e853113f 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -441,6 +441,9 @@ Release 2.6.0 - UNRELEASED HADOOP-9921. daemon scripts should remove pid file on stop call after stop or process is found not running ( vinayakumarb ) + HADOOP-10591. Compression codecs must used pooled direct buffers or + deallocate direct buffers when stream is closed (cmccabe) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java index 42e96cfdc50..37b97f2a641 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java @@ -100,7 +100,8 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec { @Override public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { - return createOutputStream(out, createCompressor()); + return CompressionCodec.Util. + createOutputStreamWithCodecPool(this, conf, out); } /** @@ -153,7 +154,8 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec { @Override public CompressionInputStream createInputStream(InputStream in) throws IOException { - return createInputStream(in, createDecompressor()); + return CompressionCodec.Util. + createInputStreamWithCodecPool(this, conf, in); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java index af2ff20b39d..f37aadfcb57 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodec.java @@ -24,6 +24,7 @@ import java.io.OutputStream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; /** * This class encapsulates a streaming compression/decompression pair. @@ -113,4 +114,58 @@ public interface CompressionCodec { * @return the extension including the '.' */ String getDefaultExtension(); + + static class Util { + /** + * Create an output stream with a codec taken from the global CodecPool. + * + * @param codec The codec to use to create the output stream. + * @param conf The configuration to use if we need to create a new codec. + * @param out The output stream to wrap. + * @return The new output stream + * @throws IOException + */ + static CompressionOutputStream createOutputStreamWithCodecPool( + CompressionCodec codec, Configuration conf, OutputStream out) + throws IOException { + Compressor compressor = CodecPool.getCompressor(codec, conf); + CompressionOutputStream stream = null; + try { + stream = codec.createOutputStream(out, compressor); + } finally { + if (stream == null) { + CodecPool.returnCompressor(compressor); + } else { + stream.setTrackedCompressor(compressor); + } + } + return stream; + } + + /** + * Create an input stream with a codec taken from the global CodecPool. + * + * @param codec The codec to use to create the input stream. + * @param conf The configuration to use if we need to create a new codec. + * @param in The input stream to wrap. + * @return The new input stream + * @throws IOException + */ + static CompressionInputStream createInputStreamWithCodecPool( + CompressionCodec codec, Configuration conf, InputStream in) + throws IOException { + Decompressor decompressor = CodecPool.getDecompressor(codec); + CompressionInputStream stream = null; + try { + stream = codec.createInputStream(in, decompressor); + } finally { + if (stream == null) { + CodecPool.returnDecompressor(decompressor); + } else { + stream.setTrackedDecompressor(decompressor); + } + } + return stream; + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java index 4491819d72c..cf3ac401cdd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java @@ -41,6 +41,8 @@ public abstract class CompressionInputStream extends InputStream implements Seek protected final InputStream in; protected long maxAvailableData = 0L; + private Decompressor trackedDecompressor; + /** * Create a compression input stream that reads * the decompressed bytes from the given stream. @@ -58,6 +60,10 @@ public abstract class CompressionInputStream extends InputStream implements Seek @Override public void close() throws IOException { in.close(); + if (trackedDecompressor != null) { + CodecPool.returnDecompressor(trackedDecompressor); + trackedDecompressor = null; + } } /** @@ -112,4 +118,8 @@ public abstract class CompressionInputStream extends InputStream implements Seek public boolean seekToNewSource(long targetPos) throws UnsupportedOperationException { throw new UnsupportedOperationException(); } + + void setTrackedDecompressor(Decompressor decompressor) { + trackedDecompressor = decompressor; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java index 9bd6b84f988..00e272a9cc5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java @@ -34,7 +34,13 @@ public abstract class CompressionOutputStream extends OutputStream { * The output stream to be compressed. */ protected final OutputStream out; - + + /** + * If non-null, this is the Compressor object that we should call + * CodecPool#returnCompressor on when this stream is closed. + */ + private Compressor trackedCompressor; + /** * Create a compression output stream that writes * the compressed bytes to the given stream. @@ -43,11 +49,19 @@ public abstract class CompressionOutputStream extends OutputStream { protected CompressionOutputStream(OutputStream out) { this.out = out; } - + + void setTrackedCompressor(Compressor compressor) { + trackedCompressor = compressor; + } + @Override public void close() throws IOException { finish(); out.close(); + if (trackedCompressor != null) { + CodecPool.returnCompressor(trackedCompressor); + trackedCompressor = null; + } } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java index dc02dcaf429..0e6f02cc9f4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/DefaultCodec.java @@ -51,14 +51,8 @@ public class DefaultCodec implements Configurable, CompressionCodec, DirectDecom @Override public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { - // This may leak memory if called in a loop. The createCompressor() call - // may cause allocation of an untracked direct-backed buffer if native - // libs are being used (even if you close the stream). A Compressor - // object should be reused between successive calls. - LOG.warn("DefaultCodec.createOutputStream() may leak memory. " - + "Create a compressor first."); - return new CompressorStream(out, createCompressor(), - conf.getInt("io.file.buffer.size", 4*1024)); + return CompressionCodec.Util. + createOutputStreamWithCodecPool(this, conf, out); } @Override @@ -82,8 +76,8 @@ public class DefaultCodec implements Configurable, CompressionCodec, DirectDecom @Override public CompressionInputStream createInputStream(InputStream in) throws IOException { - return new DecompressorStream(in, createDecompressor(), - conf.getInt("io.file.buffer.size", 4*1024)); + return CompressionCodec.Util. + createInputStreamWithCodecPool(this, conf, in); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java index 487f29bec6d..c493f1705dd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/GzipCodec.java @@ -159,10 +159,11 @@ public class GzipCodec extends DefaultCodec { @Override public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { - return (ZlibFactory.isNativeZlibLoaded(conf)) ? - new CompressorStream(out, createCompressor(), - conf.getInt("io.file.buffer.size", 4*1024)) : - new GzipOutputStream(out); + if (!ZlibFactory.isNativeZlibLoaded(conf)) { + return new GzipOutputStream(out); + } + return CompressionCodec.Util. + createOutputStreamWithCodecPool(this, conf, out); } @Override @@ -192,8 +193,9 @@ public class GzipCodec extends DefaultCodec { @Override public CompressionInputStream createInputStream(InputStream in) - throws IOException { - return createInputStream(in, null); + throws IOException { + return CompressionCodec.Util. + createInputStreamWithCodecPool(this, conf, in); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java index 4b0ea796b71..61462c08ddc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java @@ -84,7 +84,8 @@ public class Lz4Codec implements Configurable, CompressionCodec { @Override public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { - return createOutputStream(out, createCompressor()); + return CompressionCodec.Util. + createOutputStreamWithCodecPool(this, conf, out); } /** @@ -157,7 +158,8 @@ public class Lz4Codec implements Configurable, CompressionCodec { @Override public CompressionInputStream createInputStream(InputStream in) throws IOException { - return createInputStream(in, createDecompressor()); + return CompressionCodec.Util. + createInputStreamWithCodecPool(this, conf, in); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java index 402f8c8e99f..8d2fa1a6fb4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java @@ -95,7 +95,8 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp @Override public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { - return createOutputStream(out, createCompressor()); + return CompressionCodec.Util. + createOutputStreamWithCodecPool(this, conf, out); } /** @@ -158,7 +159,8 @@ public class SnappyCodec implements Configurable, CompressionCodec, DirectDecomp @Override public CompressionInputStream createInputStream(InputStream in) throws IOException { - return createInputStream(in, createDecompressor()); + return CompressionCodec.Util. + createInputStreamWithCodecPool(this, conf, in); } /** From 875592220fb250ff9d0bba73c8ace9858fd369fd Mon Sep 17 00:00:00 2001 From: Jian He Date: Thu, 17 Jul 2014 18:46:20 +0000 Subject: [PATCH 24/49] MAPREDUCE-5910. Make MR AM resync with RM in case of work-preserving RM-restart. Contributed by Rohith git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611434 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../v2/app/local/LocalContainerAllocator.java | 6 + .../mapreduce/v2/app/rm/RMCommunicator.java | 34 ++- .../v2/app/rm/RMContainerAllocator.java | 12 + .../v2/app/rm/RMContainerRequestor.java | 36 ++- .../v2/app/rm/TestRMContainerAllocator.java | 244 ++++++++++++++++++ 6 files changed, 319 insertions(+), 16 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c9e74516ffe..7da3de0f8ff 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -17,6 +17,9 @@ Trunk (Unreleased) MAPREDUCE-5232. Add a configuration to be able to log classpath and other system properties on mapreduce JVMs startup. (Sangjin Lee via vinodkv) + MAPREDUCE-5910. Make MR AM resync with RM in case of work-preserving + RM-restart. (Rohith via jianhe) + IMPROVEMENTS MAPREDUCE-3481. [Gridmix] Improve Gridmix STRESS mode. (amarrk) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java index 426dc212f52..110e9c850d7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java @@ -64,6 +64,7 @@ public class LocalContainerAllocator extends RMCommunicator private int nmPort; private int nmHttpPort; private ContainerId containerId; + protected int lastResponseID; private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); @@ -119,6 +120,11 @@ public class LocalContainerAllocator extends RMCommunicator if (allocateResponse.getAMCommand() != null) { switch(allocateResponse.getAMCommand()) { case AM_RESYNC: + LOG.info("ApplicationMaster is out of sync with ResourceManager," + + " hence resyncing."); + this.lastResponseID = 0; + register(); + break; case AM_SHUTDOWN: LOG.info("Event from RM: shutting down Application Master"); // This can happen if the RM has been restarted. If it is in that state, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index e435009c6a7..4b32c045238 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.client.ClientRMProxy; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; @@ -216,20 +217,27 @@ public abstract class RMCommunicator extends AbstractService FinishApplicationMasterRequest request = FinishApplicationMasterRequest.newInstance(finishState, sb.toString(), historyUrl); - while (true) { - FinishApplicationMasterResponse response = - scheduler.finishApplicationMaster(request); - if (response.getIsUnregistered()) { - // When excepting ClientService, other services are already stopped, - // it is safe to let clients know the final states. ClientService - // should wait for some time so clients have enough time to know the - // final states. - RunningAppContext raContext = (RunningAppContext) context; - raContext.markSuccessfulUnregistration(); - break; + try { + while (true) { + FinishApplicationMasterResponse response = + scheduler.finishApplicationMaster(request); + if (response.getIsUnregistered()) { + // When excepting ClientService, other services are already stopped, + // it is safe to let clients know the final states. ClientService + // should wait for some time so clients have enough time to know the + // final states. + RunningAppContext raContext = (RunningAppContext) context; + raContext.markSuccessfulUnregistration(); + break; + } + LOG.info("Waiting for application to be successfully unregistered."); + Thread.sleep(rmPollInterval); } - LOG.info("Waiting for application to be successfully unregistered."); - Thread.sleep(rmPollInterval); + } catch (ApplicationMasterNotRegisteredException e) { + // RM might have restarted or failed over and so lost the fact that AM had + // registered before. + register(); + doUnregistration(); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index 64872cfe671..307cdfe759c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -389,6 +389,7 @@ public class RMContainerAllocator extends RMContainerRequestor removed = true; assignedRequests.remove(aId); containersReleased++; + pendingRelease.add(containerId); release(containerId); } } @@ -641,6 +642,15 @@ public class RMContainerAllocator extends RMContainerRequestor if (response.getAMCommand() != null) { switch(response.getAMCommand()) { case AM_RESYNC: + LOG.info("ApplicationMaster is out of sync with ResourceManager," + + " hence resyncing."); + lastResponseID = 0; + + // Registering to allow RM to discover an active AM for this + // application + register(); + addOutstandingRequestOnResync(); + break; case AM_SHUTDOWN: // This can happen if the RM has been restarted. If it is in that state, // this application must clean itself up. @@ -700,6 +710,7 @@ public class RMContainerAllocator extends RMContainerRequestor LOG.error("Container complete event for unknown container id " + cont.getContainerId()); } else { + pendingRelease.remove(cont.getContainerId()); assignedRequests.remove(attemptID); // send the container completed event to Task attempt @@ -991,6 +1002,7 @@ public class RMContainerAllocator extends RMContainerRequestor private void containerNotAssigned(Container allocated) { containersReleased++; + pendingRelease.add(allocated.getId()); release(allocated.getId()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java index 18242119451..943c0af0d95 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java @@ -40,6 +40,7 @@ import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.records.AMCommand; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -58,7 +59,7 @@ public abstract class RMContainerRequestor extends RMCommunicator { private static final Log LOG = LogFactory.getLog(RMContainerRequestor.class); - private int lastResponseID; + protected int lastResponseID; private Resource availableResources; private final RecordFactory recordFactory = @@ -77,8 +78,11 @@ public abstract class RMContainerRequestor extends RMCommunicator { // numContainers dont end up as duplicates private final Set ask = new TreeSet( new org.apache.hadoop.yarn.api.records.ResourceRequest.ResourceRequestComparator()); - private final Set release = new TreeSet(); - + private final Set release = new TreeSet(); + // pendingRelease holds history or release requests.request is removed only if + // RM sends completedContainer. + // How it different from release? --> release is for per allocate() request. + protected Set pendingRelease = new TreeSet(); private boolean nodeBlacklistingEnabled; private int blacklistDisablePercent; private AtomicBoolean ignoreBlacklisting = new AtomicBoolean(false); @@ -186,6 +190,10 @@ public abstract class RMContainerRequestor extends RMCommunicator { } catch (YarnException e) { throw new IOException(e); } + + if (isResyncCommand(allocateResponse)) { + return allocateResponse; + } lastResponseID = allocateResponse.getResponseId(); availableResources = allocateResponse.getAvailableResources(); lastClusterNmCount = clusterNmCount; @@ -214,6 +222,28 @@ public abstract class RMContainerRequestor extends RMCommunicator { return allocateResponse; } + protected boolean isResyncCommand(AllocateResponse allocateResponse) { + return allocateResponse.getAMCommand() != null + && allocateResponse.getAMCommand() == AMCommand.AM_RESYNC; + } + + protected void addOutstandingRequestOnResync() { + for (Map> rr : remoteRequestsTable + .values()) { + for (Map capabalities : rr.values()) { + for (ResourceRequest request : capabalities.values()) { + addResourceRequestToAsk(request); + } + } + } + if (!ignoreBlacklisting.get()) { + blacklistAdditions.addAll(blacklistedNodes); + } + if (!pendingRelease.isEmpty()) { + release.addAll(pendingRelease); + } + } + // May be incorrect if there's multiple NodeManagers running on a single host. // knownNodeCount is based on node managers, not hosts. blacklisting is // currently based on hosts. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java index 74edce22777..e554281f37e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java @@ -78,6 +78,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; @@ -87,6 +88,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.Event; @@ -95,9 +97,13 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.resourcemanager.MockNM; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; @@ -618,6 +624,10 @@ public class TestRMContainerAllocator { super(conf); } + public MyResourceManager(Configuration conf, RMStateStore store) { + super(conf, store); + } + @Override public void serviceStart() throws Exception { super.serviceStart(); @@ -1426,6 +1436,13 @@ public class TestRMContainerAllocator { rm.getMyFifoScheduler().lastBlacklistRemovals.size()); } + private static void assertAsksAndReleases(int expectedAsk, + int expectedRelease, MyResourceManager rm) { + Assert.assertEquals(expectedAsk, rm.getMyFifoScheduler().lastAsk.size()); + Assert.assertEquals(expectedRelease, + rm.getMyFifoScheduler().lastRelease.size()); + } + private static class MyFifoScheduler extends FifoScheduler { public MyFifoScheduler(RMContext rmContext) { @@ -1440,6 +1457,7 @@ public class TestRMContainerAllocator { } List lastAsk = null; + List lastRelease = null; List lastBlacklistAdditions; List lastBlacklistRemovals; @@ -1458,6 +1476,7 @@ public class TestRMContainerAllocator { askCopy.add(reqCopy); } lastAsk = ask; + lastRelease = release; lastBlacklistAdditions = blacklistAdditions; lastBlacklistRemovals = blacklistRemovals; return super.allocate( @@ -1505,6 +1524,20 @@ public class TestRMContainerAllocator { return new ContainerFailedEvent(attemptId, host); } + private ContainerAllocatorEvent createDeallocateEvent(JobId jobId, + int taskAttemptId, boolean reduce) { + TaskId taskId; + if (reduce) { + taskId = MRBuilderUtils.newTaskId(jobId, 0, TaskType.REDUCE); + } else { + taskId = MRBuilderUtils.newTaskId(jobId, 0, TaskType.MAP); + } + TaskAttemptId attemptId = + MRBuilderUtils.newTaskAttemptId(taskId, taskAttemptId); + return new ContainerAllocatorEvent(attemptId, + ContainerAllocator.EventType.CONTAINER_DEALLOCATE); + } + private void checkAssignments(ContainerRequestEvent[] requests, List assignments, boolean checkHostMatch) { @@ -1557,6 +1590,7 @@ public class TestRMContainerAllocator { = new ArrayList(); private MyResourceManager rm; private boolean isUnregistered = false; + private AllocateResponse allocateResponse; private static AppContext createAppContext( ApplicationAttemptId appAttemptId, Job job) { AppContext context = mock(AppContext.class); @@ -1668,6 +1702,10 @@ public class TestRMContainerAllocator { super.handleEvent(f); } + public void sendDeallocate(ContainerAllocatorEvent f) { + super.handleEvent(f); + } + // API to be used by tests public List schedule() throws Exception { @@ -1713,6 +1751,20 @@ public class TestRMContainerAllocator { public boolean isUnregistered() { return isUnregistered; } + + public void updateSchedulerProxy(MyResourceManager rm) { + scheduler = rm.getApplicationMasterService(); + } + + @Override + protected AllocateResponse makeRemoteRequest() throws IOException { + allocateResponse = super.makeRemoteRequest(); + return allocateResponse; + } + + public boolean isResyncCommand() { + return super.isResyncCommand(allocateResponse); + } } @Test @@ -2022,6 +2074,198 @@ public class TestRMContainerAllocator { Assert.assertTrue(allocator.isUnregistered()); } + // Step-1 : AM send allocate request for 2 ContainerRequests and 1 + // blackListeNode + // Step-2 : 2 containers are allocated by RM. + // Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to + // RM + // Step-4 : On RM restart, AM(does not know RM is restarted) sends + // additional containerRequest(event4) and blacklisted nodes. + // Intern RM send resync command + // Step-5 : On Resync,AM sends all outstanding + // asks,release,blacklistAaddition + // and another containerRequest(event5) + // Step-6 : RM allocates containers i.e event3,event4 and cRequest5 + @Test + public void testRMContainerAllocatorResendsRequestsOnRMRestart() + throws Exception { + + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); + conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName()); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); + conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true); + + conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true); + conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1); + conf.setInt( + MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, -1); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + + MyResourceManager rm1 = new MyResourceManager(conf, memStore); + rm1.start(); + DrainDispatcher dispatcher = + (DrainDispatcher) rm1.getRMContext().getDispatcher(); + + // Submit the application + RMApp app = rm1.submitApp(1024); + dispatcher.await(); + + MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService()); + nm1.registerNode(); + nm1.nodeHeartbeat(true); // Node heartbeat + dispatcher.await(); + + ApplicationAttemptId appAttemptId = + app.getCurrentAppAttempt().getAppAttemptId(); + rm1.sendAMLaunched(appAttemptId); + dispatcher.await(); + + JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0); + Job mockJob = mock(Job.class); + when(mockJob.getReport()).thenReturn( + MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, + 0, 0, 0, 0, 0, 0, "jobfile", null, false, "")); + MyContainerAllocator allocator = + new MyContainerAllocator(rm1, conf, appAttemptId, mockJob); + + // Step-1 : AM send allocate request for 2 ContainerRequests and 1 + // blackListeNode + // create the container request + // send MAP request + ContainerRequestEvent event1 = + createReq(jobId, 1, 1024, new String[] { "h1" }); + allocator.sendRequest(event1); + + ContainerRequestEvent event2 = + createReq(jobId, 2, 2048, new String[] { "h1", "h2" }); + allocator.sendRequest(event2); + + // Send events to blacklist h2 + ContainerFailedEvent f1 = createFailEvent(jobId, 1, "h2", false); + allocator.sendFailure(f1); + + // send allocate request and 1 blacklisted nodes + List assignedContainers = + allocator.schedule(); + dispatcher.await(); + Assert.assertEquals("No of assignments must be 0", 0, + assignedContainers.size()); + // Why ask is 3, not 4? --> ask from blacklisted node h2 is removed + assertAsksAndReleases(3, 0, rm1); + assertBlacklistAdditionsAndRemovals(1, 0, rm1); + + nm1.nodeHeartbeat(true); // Node heartbeat + dispatcher.await(); + + // Step-2 : 2 containers are allocated by RM. + assignedContainers = allocator.schedule(); + dispatcher.await(); + Assert.assertEquals("No of assignments must be 2", 2, + assignedContainers.size()); + assertAsksAndReleases(0, 0, rm1); + assertBlacklistAdditionsAndRemovals(0, 0, rm1); + + assignedContainers = allocator.schedule(); + Assert.assertEquals("No of assignments must be 0", 0, + assignedContainers.size()); + assertAsksAndReleases(3, 0, rm1); + assertBlacklistAdditionsAndRemovals(0, 0, rm1); + + // Step-3 : AM Send 1 containerRequest(event3) and 1 releaseRequests to + // RM + // send container request + ContainerRequestEvent event3 = + createReq(jobId, 3, 1000, new String[] { "h1" }); + allocator.sendRequest(event3); + + // send deallocate request + ContainerAllocatorEvent deallocate1 = + createDeallocateEvent(jobId, 1, false); + allocator.sendDeallocate(deallocate1); + + assignedContainers = allocator.schedule(); + Assert.assertEquals("No of assignments must be 0", 0, + assignedContainers.size()); + assertAsksAndReleases(3, 1, rm1); + assertBlacklistAdditionsAndRemovals(0, 0, rm1); + + // Phase-2 start 2nd RM is up + MyResourceManager rm2 = new MyResourceManager(conf, memStore); + rm2.start(); + nm1.setResourceTrackerService(rm2.getResourceTrackerService()); + allocator.updateSchedulerProxy(rm2); + dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher(); + + // NM should be rebooted on heartbeat, even first heartbeat for nm2 + NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true); + Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction()); + + // new NM to represent NM re-register + nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService()); + nm1.registerNode(); + nm1.nodeHeartbeat(true); + dispatcher.await(); + + // Step-4 : On RM restart, AM(does not know RM is restarted) sends + // additional containerRequest(event4) and blacklisted nodes. + // Intern RM send resync command + + // send deallocate request, release=1 + ContainerAllocatorEvent deallocate2 = + createDeallocateEvent(jobId, 2, false); + allocator.sendDeallocate(deallocate2); + + // Send events to blacklist nodes h3 + ContainerFailedEvent f2 = createFailEvent(jobId, 1, "h3", false); + allocator.sendFailure(f2); + + ContainerRequestEvent event4 = + createReq(jobId, 4, 2000, new String[] { "h1", "h2" }); + allocator.sendRequest(event4); + + // send allocate request to 2nd RM and get resync command + allocator.schedule(); + dispatcher.await(); + Assert.assertTrue("Last allocate response is not RESYNC", + allocator.isResyncCommand()); + + // Step-5 : On Resync,AM sends all outstanding + // asks,release,blacklistAaddition + // and another containerRequest(event5) + ContainerRequestEvent event5 = + createReq(jobId, 5, 3000, new String[] { "h1", "h2", "h3" }); + allocator.sendRequest(event5); + + // send all outstanding request again. + assignedContainers = allocator.schedule(); + dispatcher.await(); + assertAsksAndReleases(3, 2, rm2); + assertBlacklistAdditionsAndRemovals(2, 0, rm2); + + nm1.nodeHeartbeat(true); + dispatcher.await(); + + // Step-6 : RM allocates containers i.e event3,event4 and cRequest5 + assignedContainers = allocator.schedule(); + dispatcher.await(); + + Assert.assertEquals("Number of container should be 3", 3, + assignedContainers.size()); + + for (TaskAttemptContainerAssignedEvent assig : assignedContainers) { + Assert.assertTrue("Assigned count not correct", + "h1".equals(assig.getContainer().getNodeId().getHost())); + } + + rm1.stop(); + rm2.stop(); + + } + public static void main(String[] args) throws Exception { TestRMContainerAllocator t = new TestRMContainerAllocator(); t.testSimple(); From ce1659f070a76b853eebba9488349a289415297f Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Thu, 17 Jul 2014 19:17:51 +0000 Subject: [PATCH 25/49] HDFS-6693. TestDFSAdminWithHA fails on windows ( Contributed by Vinayakumar B ) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611441 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hadoop/hdfs/tools/TestDFSAdminWithHA.java | 42 ++++++++++++------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0e44c1a95a7..405ecdbe29a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -324,6 +324,8 @@ Release 2.6.0 - UNRELEASED HDFS-6478. RemoteException can't be retried properly for non-HA scenario. (Ming Ma via jing9) + HDFS-6693. TestDFSAdminWithHA fails on windows ( vinayakumarb ) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdminWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdminWithHA.java index 40826134d96..6859e436a4a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdminWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdminWithHA.java @@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream; import java.io.PrintStream; import com.google.common.base.Charsets; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -46,6 +47,7 @@ public class TestDFSAdminWithHA { private PrintStream originErr; private static final String NSID = "ns1"; + private static String newLine = System.getProperty("line.separator"); private void assertOutputMatches(String string) { String errOutput = new String(out.toByteArray(), Charsets.UTF_8); @@ -99,6 +101,14 @@ public class TestDFSAdminWithHA { System.err.flush(); System.setOut(originOut); System.setErr(originErr); + if (admin != null) { + admin.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + out.reset(); + err.reset(); } @Test(timeout = 30000) @@ -108,25 +118,25 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-safemode", "enter"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Safe mode is ON in.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); // Get safemode exitCode = admin.run(new String[] {"-safemode", "get"}); assertEquals(err.toString().trim(), 0, exitCode); message = "Safe mode is ON in.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); // Leave safemode exitCode = admin.run(new String[] {"-safemode", "leave"}); assertEquals(err.toString().trim(), 0, exitCode); message = "Safe mode is OFF in.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); // Get safemode exitCode = admin.run(new String[] {"-safemode", "get"}); assertEquals(err.toString().trim(), 0, exitCode); message = "Safe mode is OFF in.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -136,12 +146,12 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-safemode", "enter"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Safe mode is ON in.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); exitCode = admin.run(new String[] {"-saveNamespace"}); assertEquals(err.toString().trim(), 0, exitCode); message = "Save namespace successful for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -151,17 +161,17 @@ public class TestDFSAdminWithHA { assertEquals(err.toString().trim(), 0, exitCode); String message = "restoreFailedStorage is set to false for.*"; // Default is false - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); exitCode = admin.run(new String[] {"-restoreFailedStorage", "true"}); assertEquals(err.toString().trim(), 0, exitCode); message = "restoreFailedStorage is set to true for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); exitCode = admin.run(new String[] {"-restoreFailedStorage", "false"}); assertEquals(err.toString().trim(), 0, exitCode); message = "restoreFailedStorage is set to false for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -170,7 +180,7 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-refreshNodes"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Refresh nodes successful for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -179,7 +189,7 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-setBalancerBandwidth", "10"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Balancer bandwidth is set to 10 for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -189,7 +199,7 @@ public class TestDFSAdminWithHA { assertEquals(err.toString().trim(), 0, exitCode); String message = "Created metasave file dfs.meta in the log directory" + " of namenode.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -198,7 +208,7 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-refreshServiceAcl"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Refresh service acl successful for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -207,7 +217,7 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-refreshUserToGroupsMappings"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Refresh user to groups mapping successful for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -217,7 +227,7 @@ public class TestDFSAdminWithHA { new String[] {"-refreshSuperUserGroupsConfiguration"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Refresh super user groups configuration successful for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } @Test (timeout = 30000) @@ -226,6 +236,6 @@ public class TestDFSAdminWithHA { int exitCode = admin.run(new String[] {"-refreshCallQueue"}); assertEquals(err.toString().trim(), 0, exitCode); String message = "Refresh call queue successful for.*"; - assertOutputMatches(message + "\n" + message + "\n"); + assertOutputMatches(message + newLine + message + newLine); } } From dae1b6cc2aaa734f9fd52569d366c45fd56679e8 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Thu, 17 Jul 2014 21:56:22 +0000 Subject: [PATCH 26/49] HADOOP-10610. Upgrade S3n s3.fs.buffer.dir to support multi directories. Contributed by Ted Malaska. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611489 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../apache/hadoop/fs/s3native/NativeS3FileSystem.java | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 449e853113f..9e3e826ea77 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -423,6 +423,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10733. Fix potential null dereference in CredShell. (Ted Yu via omalley) + HADOOP-10610. Upgrade S3n s3.fs.buffer.dir to support multi directories. + (Ted Malaska via atm) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java index e978e7067ef..acc5500d7f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.s3.S3Exception; @@ -225,6 +226,7 @@ public class NativeS3FileSystem extends FileSystem { private OutputStream backupStream; private MessageDigest digest; private boolean closed; + private LocalDirAllocator lDirAlloc; public NativeS3FsOutputStream(Configuration conf, NativeFileSystemStore store, String key, Progressable progress, @@ -246,11 +248,10 @@ public class NativeS3FileSystem extends FileSystem { } private File newBackupFile() throws IOException { - File dir = new File(conf.get("fs.s3.buffer.dir")); - if (!dir.mkdirs() && !dir.exists()) { - throw new IOException("Cannot create S3 buffer directory: " + dir); + if (lDirAlloc == null) { + lDirAlloc = new LocalDirAllocator("fs.s3.buffer.dir"); } - File result = File.createTempFile("output-", ".tmp", dir); + File result = lDirAlloc.createTmpFileForWrite("output-", LocalDirAllocator.SIZE_UNKNOWN, conf); result.deleteOnExit(); return result; } From 7ba5913797c49d5001ad95558eadd119c3361060 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 17 Jul 2014 23:11:27 +0000 Subject: [PATCH 27/49] HDFS-6667. In HDFS HA mode, Distcp/SLive with webhdfs on secure cluster fails with Client cannot authenticate via:[TOKEN, KERBEROS] error. Contributed by Jing Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611508 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../java/org/apache/hadoop/hdfs/HAUtil.java | 35 +++++++++++-------- .../apache/hadoop/hdfs/NameNodeProxies.java | 6 ++-- .../hadoop/hdfs/protocol/HdfsConstants.java | 2 +- .../web/resources/DatanodeWebHdfsMethods.java | 3 +- .../hadoop/hdfs/web/WebHdfsFileSystem.java | 2 +- .../ha/TestDelegationTokensWithHA.java | 10 ++++-- 7 files changed, 39 insertions(+), 22 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 405ecdbe29a..00d1e93b68d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -326,6 +326,9 @@ Release 2.6.0 - UNRELEASED HDFS-6693. TestDFSAdminWithHA fails on windows ( vinayakumarb ) + HDFS-6667. In HDFS HA mode, Distcp/SLive with webhdfs on secure cluster fails + with Client cannot authenticate via:[TOKEN, KERBEROS] error. (jing9) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java index 250d41c5cba..90acedea12c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java @@ -26,7 +26,6 @@ import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HA_DT_SERVICE_PREFIX import java.io.IOException; import java.net.InetSocketAddress; import java.net.URI; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -38,14 +37,13 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector; -import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider; import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider; import org.apache.hadoop.io.Text; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RemoteException; @@ -259,12 +257,11 @@ public class HAUtil { /** * Parse the file system URI out of the provided token. */ - public static URI getServiceUriFromToken(final String scheme, - Token token) { + public static URI getServiceUriFromToken(final String scheme, Token token) { String tokStr = token.getService().toString(); - - if (tokStr.startsWith(HA_DT_SERVICE_PREFIX)) { - tokStr = tokStr.replaceFirst(HA_DT_SERVICE_PREFIX, ""); + final String prefix = buildTokenServicePrefixForLogicalUri(scheme); + if (tokStr.startsWith(prefix)) { + tokStr = tokStr.replaceFirst(prefix, ""); } return URI.create(scheme + "://" + tokStr); } @@ -273,10 +270,13 @@ public class HAUtil { * Get the service name used in the delegation token for the given logical * HA service. * @param uri the logical URI of the cluster + * @param scheme the scheme of the corresponding FileSystem * @return the service name */ - public static Text buildTokenServiceForLogicalUri(URI uri) { - return new Text(HA_DT_SERVICE_PREFIX + uri.getHost()); + public static Text buildTokenServiceForLogicalUri(final URI uri, + final String scheme) { + return new Text(buildTokenServicePrefixForLogicalUri(scheme) + + uri.getHost()); } /** @@ -286,7 +286,11 @@ public class HAUtil { public static boolean isTokenForLogicalUri(Token token) { return token.getService().toString().startsWith(HA_DT_SERVICE_PREFIX); } - + + public static String buildTokenServicePrefixForLogicalUri(String scheme) { + return HA_DT_SERVICE_PREFIX + scheme + ":"; + } + /** * Locate a delegation token associated with the given HA cluster URI, and if * one is found, clone it to also represent the underlying namenode address. @@ -298,7 +302,9 @@ public class HAUtil { public static void cloneDelegationTokenForLogicalUri( UserGroupInformation ugi, URI haUri, Collection nnAddrs) { - Text haService = HAUtil.buildTokenServiceForLogicalUri(haUri); + // this cloning logic is only used by hdfs + Text haService = HAUtil.buildTokenServiceForLogicalUri(haUri, + HdfsConstants.HDFS_URI_SCHEME); Token haToken = tokenSelector.selectToken(haService, ugi.getTokens()); if (haToken != null) { @@ -309,8 +315,9 @@ public class HAUtil { Token specificToken = new Token.PrivateToken(haToken); SecurityUtil.setTokenService(specificToken, singleNNAddr); - Text alias = - new Text(HA_DT_SERVICE_PREFIX + "//" + specificToken.getService()); + Text alias = new Text( + buildTokenServicePrefixForLogicalUri(HdfsConstants.HDFS_URI_SCHEME) + + "//" + specificToken.getService()); ugi.addToken(alias, specificToken); LOG.debug("Mapped HA service delegation token for logical URI " + haUri + " to namenode " + singleNNAddr); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index ab8f3dc7bd7..17653345ef9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -163,7 +163,8 @@ public class NameNodeProxies { Text dtService; if (failoverProxyProvider.useLogicalURI()) { - dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri); + dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri, + HdfsConstants.HDFS_URI_SCHEME); } else { dtService = SecurityUtil.buildTokenService( NameNode.getAddress(nameNodeUri)); @@ -224,7 +225,8 @@ public class NameNodeProxies { new Class[] { xface }, dummyHandler); Text dtService; if (failoverProxyProvider.useLogicalURI()) { - dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri); + dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri, + HdfsConstants.HDFS_URI_SCHEME); } else { dtService = SecurityUtil.buildTokenService( NameNode.getAddress(nameNodeUri)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java index 7cc8c318803..77fe543784b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java @@ -124,7 +124,7 @@ public class HdfsConstants { * of a delgation token, indicating that the URI is a logical (HA) * URI. */ - public static final String HA_DT_SERVICE_PREFIX = "ha-hdfs:"; + public static final String HA_DT_SERVICE_PREFIX = "ha-"; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java index 83de6ebe41b..51731c8d013 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java @@ -128,7 +128,8 @@ public class DatanodeWebHdfsMethods { "://" + nnId); boolean isLogical = HAUtil.isLogicalUri(conf, nnUri); if (isLogical) { - token.setService(HAUtil.buildTokenServiceForLogicalUri(nnUri)); + token.setService(HAUtil.buildTokenServiceForLogicalUri(nnUri, + HdfsConstants.HDFS_URI_SCHEME)); } else { token.setService(SecurityUtil.buildTokenService(nnUri)); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 94c666a3a11..6eb09f61340 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -158,7 +158,7 @@ public class WebHdfsFileSystem extends FileSystem // getCanonicalUri() in order to handle the case where no port is // specified in the URI this.tokenServiceName = isLogicalUri ? - HAUtil.buildTokenServiceForLogicalUri(uri) + HAUtil.buildTokenServiceForLogicalUri(uri, getScheme()) : SecurityUtil.buildTokenService(getCanonicalUri()); if (!isHA) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java index 46059520f88..b2cc9197aa8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector; @@ -299,7 +300,8 @@ public class TestDelegationTokensWithHA { UserGroupInformation ugi = UserGroupInformation.createRemoteUser("test"); URI haUri = new URI("hdfs://my-ha-uri/"); - token.setService(HAUtil.buildTokenServiceForLogicalUri(haUri)); + token.setService(HAUtil.buildTokenServiceForLogicalUri(haUri, + HdfsConstants.HDFS_URI_SCHEME)); ugi.addToken(token); Collection nnAddrs = new HashSet(); @@ -355,7 +357,8 @@ public class TestDelegationTokensWithHA { @Test public void testDFSGetCanonicalServiceName() throws Exception { URI hAUri = HATestUtil.getLogicalUri(cluster); - String haService = HAUtil.buildTokenServiceForLogicalUri(hAUri).toString(); + String haService = HAUtil.buildTokenServiceForLogicalUri(hAUri, + HdfsConstants.HDFS_URI_SCHEME).toString(); assertEquals(haService, dfs.getCanonicalServiceName()); final String renewer = UserGroupInformation.getCurrentUser().getShortUserName(); final Token token = @@ -371,7 +374,8 @@ public class TestDelegationTokensWithHA { Configuration conf = dfs.getConf(); URI haUri = HATestUtil.getLogicalUri(cluster); AbstractFileSystem afs = AbstractFileSystem.createFileSystem(haUri, conf); - String haService = HAUtil.buildTokenServiceForLogicalUri(haUri).toString(); + String haService = HAUtil.buildTokenServiceForLogicalUri(haUri, + HdfsConstants.HDFS_URI_SCHEME).toString(); assertEquals(haService, afs.getCanonicalServiceName()); Token token = afs.getDelegationTokens( UserGroupInformation.getCurrentUser().getShortUserName()).get(0); From 403ec8ea80d59f209823a7370dc8185fa2c1c368 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Thu, 17 Jul 2014 23:33:22 +0000 Subject: [PATCH 28/49] YARN-1341. Recover NMTokens upon nodemanager restart. (Contributed by Jason Lowe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611512 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../security/BaseNMTokenSecretManager.java | 2 +- .../yarn/server/nodemanager/NodeManager.java | 13 +- .../recovery/NMLeveldbStateStoreService.java | 99 +++++++++++ .../recovery/NMNullStateStoreService.java | 28 ++++ .../recovery/NMStateStoreService.java | 35 ++++ .../security/NMTokenSecretManagerInNM.java | 116 +++++++++++-- .../recovery/NMMemoryStateStoreService.java | 49 ++++++ .../TestNMLeveldbStateStoreService.java | 81 +++++++++ .../TestNMTokenSecretManagerInNM.java | 154 ++++++++++++++++++ 10 files changed, 561 insertions(+), 19 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/security/TestNMTokenSecretManagerInNM.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a07686ce0d0..4665703b286 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -46,6 +46,9 @@ Release 2.6.0 - UNRELEASED YARN-2228. Augmented TimelineServer to load pseudo authentication filter when authentication = simple. (Zhijie Shen via vinodkv) + YARN-1341. Recover NMTokens upon nodemanager restart. (Jason Lowe via + junping_du) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/security/BaseNMTokenSecretManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/security/BaseNMTokenSecretManager.java index 01da1af8aa3..759b6f23a57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/security/BaseNMTokenSecretManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/security/BaseNMTokenSecretManager.java @@ -42,7 +42,7 @@ public class BaseNMTokenSecretManager extends private static Log LOG = LogFactory .getLog(BaseNMTokenSecretManager.class); - private int serialNo = new SecureRandom().nextInt(); + protected int serialNo = new SecureRandom().nextInt(); protected final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); protected final Lock readLock = readWriteLock.readLock(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 1109b087c17..65988a211d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -169,6 +169,15 @@ public class NodeManager extends CompositeService } } + private void recoverTokens(NMTokenSecretManagerInNM nmTokenSecretManager, + NMContainerTokenSecretManager containerTokenSecretManager) + throws IOException { + if (nmStore.canRecover()) { + nmTokenSecretManager.recover(nmStore.loadNMTokenState()); + // TODO: recover containerTokenSecretManager + } + } + @Override protected void serviceInit(Configuration conf) throws Exception { @@ -184,7 +193,9 @@ public class NodeManager extends CompositeService new NMContainerTokenSecretManager(conf); NMTokenSecretManagerInNM nmTokenSecretManager = - new NMTokenSecretManagerInNM(); + new NMTokenSecretManagerInNM(nmStore); + + recoverTokens(nmTokenSecretManager, containerTokenSecretManager); this.aclsManager = new ApplicationACLsManager(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java index dc9aa886ad9..1954fee4425 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java @@ -35,11 +35,15 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; +import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; import org.apache.hadoop.yarn.server.utils.LeveldbIterator; import org.apache.hadoop.yarn.util.ConverterUtils; import org.fusesource.leveldbjni.JniDBFactory; @@ -72,6 +76,14 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { private static final String LOCALIZATION_FILECACHE_SUFFIX = "filecache/"; private static final String LOCALIZATION_APPCACHE_SUFFIX = "appcache/"; + private static final String CURRENT_MASTER_KEY_SUFFIX = "CurrentMasterKey"; + private static final String PREV_MASTER_KEY_SUFFIX = "PreviousMasterKey"; + private static final String NM_TOKENS_KEY_PREFIX = "NMTokens/"; + private static final String NM_TOKENS_CURRENT_MASTER_KEY = + NM_TOKENS_KEY_PREFIX + CURRENT_MASTER_KEY_SUFFIX; + private static final String NM_TOKENS_PREV_MASTER_KEY = + NM_TOKENS_KEY_PREFIX + PREV_MASTER_KEY_SUFFIX; + private DB db; public NMLeveldbStateStoreService() { @@ -367,6 +379,93 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { } + @Override + public RecoveredNMTokenState loadNMTokenState() throws IOException { + RecoveredNMTokenState state = new RecoveredNMTokenState(); + state.applicationMasterKeys = + new HashMap(); + LeveldbIterator iter = null; + try { + iter = new LeveldbIterator(db); + iter.seek(bytes(NM_TOKENS_KEY_PREFIX)); + while (iter.hasNext()) { + Entry entry = iter.next(); + String fullKey = asString(entry.getKey()); + if (!fullKey.startsWith(NM_TOKENS_KEY_PREFIX)) { + break; + } + String key = fullKey.substring(NM_TOKENS_KEY_PREFIX.length()); + if (key.equals(CURRENT_MASTER_KEY_SUFFIX)) { + state.currentMasterKey = parseMasterKey(entry.getValue()); + } else if (key.equals(PREV_MASTER_KEY_SUFFIX)) { + state.previousMasterKey = parseMasterKey(entry.getValue()); + } else if (key.startsWith( + ApplicationAttemptId.appAttemptIdStrPrefix)) { + ApplicationAttemptId attempt; + try { + attempt = ConverterUtils.toApplicationAttemptId(key); + } catch (IllegalArgumentException e) { + throw new IOException("Bad application master key state for " + + fullKey, e); + } + state.applicationMasterKeys.put(attempt, + parseMasterKey(entry.getValue())); + } + } + } catch (DBException e) { + throw new IOException(e.getMessage(), e); + } finally { + if (iter != null) { + iter.close(); + } + } + return state; + } + + @Override + public void storeNMTokenCurrentMasterKey(MasterKey key) + throws IOException { + storeMasterKey(NM_TOKENS_CURRENT_MASTER_KEY, key); + } + + @Override + public void storeNMTokenPreviousMasterKey(MasterKey key) + throws IOException { + storeMasterKey(NM_TOKENS_PREV_MASTER_KEY, key); + } + + @Override + public void storeNMTokenApplicationMasterKey( + ApplicationAttemptId attempt, MasterKey key) throws IOException { + storeMasterKey(NM_TOKENS_KEY_PREFIX + attempt, key); + } + + @Override + public void removeNMTokenApplicationMasterKey( + ApplicationAttemptId attempt) throws IOException { + String key = NM_TOKENS_KEY_PREFIX + attempt; + try { + db.delete(bytes(key)); + } catch (DBException e) { + throw new IOException(e.getMessage(), e); + } + } + + private MasterKey parseMasterKey(byte[] keyData) throws IOException { + return new MasterKeyPBImpl(MasterKeyProto.parseFrom(keyData)); + } + + private void storeMasterKey(String dbKey, MasterKey key) + throws IOException { + MasterKeyPBImpl pb = (MasterKeyPBImpl) key; + try { + db.put(bytes(dbKey), pb.getProto().toByteArray()); + } catch (DBException e) { + throw new IOException(e.getMessage(), e); + } + } + + @Override protected void initStorage(Configuration conf) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java index dfe4f096bf3..5d9e0ea15a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java @@ -22,10 +22,12 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; +import org.apache.hadoop.yarn.server.api.records.MasterKey; // The state store to use when state isn't being stored public class NMNullStateStoreService extends NMStateStoreService { @@ -77,6 +79,32 @@ public class NMNullStateStoreService extends NMStateStoreService { public void removeDeletionTask(int taskId) throws IOException { } + @Override + public RecoveredNMTokenState loadNMTokenState() throws IOException { + throw new UnsupportedOperationException( + "Recovery not supported by this state store"); + } + + @Override + public void storeNMTokenCurrentMasterKey(MasterKey key) + throws IOException { + } + + @Override + public void storeNMTokenPreviousMasterKey(MasterKey key) + throws IOException { + } + + @Override + public void storeNMTokenApplicationMasterKey(ApplicationAttemptId attempt, + MasterKey key) throws IOException { + } + + @Override + public void removeNMTokenApplicationMasterKey(ApplicationAttemptId attempt) + throws IOException { + } + @Override protected void initStorage(Configuration conf) throws IOException { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java index f2e594528be..8a5944dbd14 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java @@ -29,10 +29,12 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; +import org.apache.hadoop.yarn.server.api.records.MasterKey; @Private @Unstable @@ -100,6 +102,24 @@ public abstract class NMStateStoreService extends AbstractService { } } + public static class RecoveredNMTokenState { + MasterKey currentMasterKey; + MasterKey previousMasterKey; + Map applicationMasterKeys; + + public MasterKey getCurrentMasterKey() { + return currentMasterKey; + } + + public MasterKey getPreviousMasterKey() { + return previousMasterKey; + } + + public Map getApplicationMasterKeys() { + return applicationMasterKeys; + } + } + /** Initialize the state storage */ @Override public void serviceInit(Configuration conf) throws IOException { @@ -173,6 +193,21 @@ public abstract class NMStateStoreService extends AbstractService { public abstract void removeDeletionTask(int taskId) throws IOException; + public abstract RecoveredNMTokenState loadNMTokenState() throws IOException; + + public abstract void storeNMTokenCurrentMasterKey(MasterKey key) + throws IOException; + + public abstract void storeNMTokenPreviousMasterKey(MasterKey key) + throws IOException; + + public abstract void storeNMTokenApplicationMasterKey( + ApplicationAttemptId attempt, MasterKey key) throws IOException; + + public abstract void removeNMTokenApplicationMasterKey( + ApplicationAttemptId attempt) throws IOException; + + protected abstract void initStorage(Configuration conf) throws IOException; protected abstract void startStorage() throws IOException; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java index 9569fdc08c5..a9b9b994add 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/security/NMTokenSecretManagerInNM.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.security; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -31,6 +32,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredNMTokenState; import org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager; import org.apache.hadoop.yarn.server.security.MasterKeyData; @@ -45,16 +49,78 @@ public class NMTokenSecretManagerInNM extends BaseNMTokenSecretManager { private final Map oldMasterKeys; private final Map> appToAppAttemptMap; + private final NMStateStoreService stateStore; private NodeId nodeId; - public NMTokenSecretManagerInNM() { + this(new NMNullStateStoreService()); + } + + public NMTokenSecretManagerInNM(NMStateStoreService stateStore) { this.oldMasterKeys = new HashMap(); appToAppAttemptMap = new HashMap>(); + this.stateStore = stateStore; } + public synchronized void recover(RecoveredNMTokenState state) + throws IOException { + MasterKey key = state.getCurrentMasterKey(); + if (key != null) { + super.currentMasterKey = + new MasterKeyData(key, createSecretKey(key.getBytes().array())); + } + + key = state.getPreviousMasterKey(); + if (key != null) { + previousMasterKey = + new MasterKeyData(key, createSecretKey(key.getBytes().array())); + } + + // restore the serial number from the current master key + if (super.currentMasterKey != null) { + super.serialNo = super.currentMasterKey.getMasterKey().getKeyId() + 1; + } + + for (Map.Entry entry : + state.getApplicationMasterKeys().entrySet()) { + key = entry.getValue(); + oldMasterKeys.put(entry.getKey(), + new MasterKeyData(key, createSecretKey(key.getBytes().array()))); + } + + // reconstruct app to app attempts map + appToAppAttemptMap.clear(); + for (ApplicationAttemptId attempt : oldMasterKeys.keySet()) { + ApplicationId app = attempt.getApplicationId(); + List attempts = appToAppAttemptMap.get(app); + if (attempts == null) { + attempts = new ArrayList(); + appToAppAttemptMap.put(app, attempts); + } + attempts.add(attempt); + } + } + + private void updateCurrentMasterKey(MasterKeyData key) { + super.currentMasterKey = key; + try { + stateStore.storeNMTokenCurrentMasterKey(key.getMasterKey()); + } catch (IOException e) { + LOG.error("Unable to update current master key in state store", e); + } + } + + private void updatePreviousMasterKey(MasterKeyData key) { + previousMasterKey = key; + try { + stateStore.storeNMTokenPreviousMasterKey(key.getMasterKey()); + } catch (IOException e) { + LOG.error("Unable to update previous master key in state store", e); + } + } + /** * Used by NodeManagers to create a token-secret-manager with the key * obtained from the RM. This can happen during registration or when the RM @@ -62,20 +128,16 @@ public class NMTokenSecretManagerInNM extends BaseNMTokenSecretManager { */ @Private public synchronized void setMasterKey(MasterKey masterKey) { - LOG.info("Rolling master-key for nm-tokens, got key with id :" - + masterKey.getKeyId()); - if (super.currentMasterKey == null) { - super.currentMasterKey = - new MasterKeyData(masterKey, createSecretKey(masterKey.getBytes() - .array())); - } else { - if (super.currentMasterKey.getMasterKey().getKeyId() != masterKey - .getKeyId()) { - this.previousMasterKey = super.currentMasterKey; - super.currentMasterKey = - new MasterKeyData(masterKey, createSecretKey(masterKey.getBytes() - .array())); + // Update keys only if the key has changed. + if (super.currentMasterKey == null || super.currentMasterKey.getMasterKey() + .getKeyId() != masterKey.getKeyId()) { + LOG.info("Rolling master-key for container-tokens, got key with id " + + masterKey.getKeyId()); + if (super.currentMasterKey != null) { + updatePreviousMasterKey(super.currentMasterKey); } + updateCurrentMasterKey(new MasterKeyData(masterKey, + createSecretKey(masterKey.getBytes().array()))); } } @@ -128,7 +190,7 @@ public class NMTokenSecretManagerInNM extends BaseNMTokenSecretManager { LOG.debug("Removing application attempts NMToken keys for application " + appId); for (ApplicationAttemptId appAttemptId : appAttemptList) { - this.oldMasterKeys.remove(appAttemptId); + removeAppAttemptKey(appAttemptId); } appToAppAttemptMap.remove(appId); } else { @@ -164,11 +226,11 @@ public class NMTokenSecretManagerInNM extends BaseNMTokenSecretManager { + identifier.getApplicationAttemptId().toString()); if (identifier.getKeyId() == currentMasterKey.getMasterKey() .getKeyId()) { - oldMasterKeys.put(appAttemptId, currentMasterKey); + updateAppAttemptKey(appAttemptId, currentMasterKey); } else if (previousMasterKey != null && identifier.getKeyId() == previousMasterKey.getMasterKey() .getKeyId()) { - oldMasterKeys.put(appAttemptId, previousMasterKey); + updateAppAttemptKey(appAttemptId, previousMasterKey); } else { throw new InvalidToken( "Older NMToken should not be used while starting the container."); @@ -193,4 +255,24 @@ public class NMTokenSecretManagerInNM extends BaseNMTokenSecretManager { public synchronized NodeId getNodeId() { return this.nodeId; } + + private void updateAppAttemptKey(ApplicationAttemptId attempt, + MasterKeyData key) { + this.oldMasterKeys.put(attempt, key); + try { + stateStore.storeNMTokenApplicationMasterKey(attempt, + key.getMasterKey()); + } catch (IOException e) { + LOG.error("Unable to store master key for application " + attempt, e); + } + } + + private void removeAppAttemptKey(ApplicationAttemptId attempt) { + this.oldMasterKeys.remove(attempt); + try { + stateStore.removeNMTokenApplicationMasterKey(attempt); + } catch (IOException e) { + LOG.error("Unable to remove master key for application " + attempt, e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java index 0c8a8439b47..9909d9db9e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java @@ -25,14 +25,18 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; +import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; public class NMMemoryStateStoreService extends NMStateStoreService { private Map trackerStates; private Map deleteTasks; + private RecoveredNMTokenState nmTokenState; public NMMemoryStateStoreService() { super(NMMemoryStateStoreService.class.getName()); @@ -113,8 +117,12 @@ public class NMMemoryStateStoreService extends NMStateStoreService { @Override protected void initStorage(Configuration conf) { + nmTokenState = new RecoveredNMTokenState(); + nmTokenState.applicationMasterKeys = + new HashMap(); trackerStates = new HashMap(); deleteTasks = new HashMap(); + } @Override @@ -148,6 +156,47 @@ public class NMMemoryStateStoreService extends NMStateStoreService { } + @Override + public RecoveredNMTokenState loadNMTokenState() throws IOException { + // return a copy so caller can't modify our state + RecoveredNMTokenState result = new RecoveredNMTokenState(); + result.currentMasterKey = nmTokenState.currentMasterKey; + result.previousMasterKey = nmTokenState.previousMasterKey; + result.applicationMasterKeys = + new HashMap( + nmTokenState.applicationMasterKeys); + return result; + } + + @Override + public void storeNMTokenCurrentMasterKey(MasterKey key) + throws IOException { + MasterKeyPBImpl keypb = (MasterKeyPBImpl) key; + nmTokenState.currentMasterKey = new MasterKeyPBImpl(keypb.getProto()); + } + + @Override + public void storeNMTokenPreviousMasterKey(MasterKey key) + throws IOException { + MasterKeyPBImpl keypb = (MasterKeyPBImpl) key; + nmTokenState.previousMasterKey = new MasterKeyPBImpl(keypb.getProto()); + } + + @Override + public void storeNMTokenApplicationMasterKey(ApplicationAttemptId attempt, + MasterKey key) throws IOException { + MasterKeyPBImpl keypb = (MasterKeyPBImpl) key; + nmTokenState.applicationMasterKeys.put(attempt, + new MasterKeyPBImpl(keypb.getProto())); + } + + @Override + public void removeNMTokenApplicationMasterKey(ApplicationAttemptId attempt) + throws IOException { + nmTokenState.applicationMasterKeys.remove(attempt); + } + + private static class TrackerState { Map inProgressMap = new HashMap(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java index 494b27fff73..89d1c237bd2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.File; @@ -28,6 +29,7 @@ import java.util.Map; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; @@ -37,10 +39,13 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; +import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredDeletionServiceState; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredLocalizationState; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredNMTokenState; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredUserResources; +import org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; import org.junit.Before; @@ -460,4 +465,80 @@ public class TestNMLeveldbStateStoreService { state = stateStore.loadDeletionServiceState(); assertTrue(state.getTasks().isEmpty()); } + + @Test + public void testNMTokenStorage() throws IOException { + // test empty when no state + RecoveredNMTokenState state = stateStore.loadNMTokenState(); + assertNull(state.getCurrentMasterKey()); + assertNull(state.getPreviousMasterKey()); + assertTrue(state.getApplicationMasterKeys().isEmpty()); + + // store a master key and verify recovered + NMTokenSecretManagerForTest secretMgr = new NMTokenSecretManagerForTest(); + MasterKey currentKey = secretMgr.generateKey(); + stateStore.storeNMTokenCurrentMasterKey(currentKey); + restartStateStore(); + state = stateStore.loadNMTokenState(); + assertEquals(currentKey, state.getCurrentMasterKey()); + assertNull(state.getPreviousMasterKey()); + assertTrue(state.getApplicationMasterKeys().isEmpty()); + + // store a previous key and verify recovered + MasterKey prevKey = secretMgr.generateKey(); + stateStore.storeNMTokenPreviousMasterKey(prevKey); + restartStateStore(); + state = stateStore.loadNMTokenState(); + assertEquals(currentKey, state.getCurrentMasterKey()); + assertEquals(prevKey, state.getPreviousMasterKey()); + assertTrue(state.getApplicationMasterKeys().isEmpty()); + + // store a few application keys and verify recovered + ApplicationAttemptId attempt1 = ApplicationAttemptId.newInstance( + ApplicationId.newInstance(1, 1), 1); + MasterKey attemptKey1 = secretMgr.generateKey(); + stateStore.storeNMTokenApplicationMasterKey(attempt1, attemptKey1); + ApplicationAttemptId attempt2 = ApplicationAttemptId.newInstance( + ApplicationId.newInstance(2, 3), 4); + MasterKey attemptKey2 = secretMgr.generateKey(); + stateStore.storeNMTokenApplicationMasterKey(attempt2, attemptKey2); + restartStateStore(); + state = stateStore.loadNMTokenState(); + assertEquals(currentKey, state.getCurrentMasterKey()); + assertEquals(prevKey, state.getPreviousMasterKey()); + Map loadedAppKeys = + state.getApplicationMasterKeys(); + assertEquals(2, loadedAppKeys.size()); + assertEquals(attemptKey1, loadedAppKeys.get(attempt1)); + assertEquals(attemptKey2, loadedAppKeys.get(attempt2)); + + // add/update/remove keys and verify recovered + ApplicationAttemptId attempt3 = ApplicationAttemptId.newInstance( + ApplicationId.newInstance(5, 6), 7); + MasterKey attemptKey3 = secretMgr.generateKey(); + stateStore.storeNMTokenApplicationMasterKey(attempt3, attemptKey3); + stateStore.removeNMTokenApplicationMasterKey(attempt1); + attemptKey2 = prevKey; + stateStore.storeNMTokenApplicationMasterKey(attempt2, attemptKey2); + prevKey = currentKey; + stateStore.storeNMTokenPreviousMasterKey(prevKey); + currentKey = secretMgr.generateKey(); + stateStore.storeNMTokenCurrentMasterKey(currentKey); + restartStateStore(); + state = stateStore.loadNMTokenState(); + assertEquals(currentKey, state.getCurrentMasterKey()); + assertEquals(prevKey, state.getPreviousMasterKey()); + loadedAppKeys = state.getApplicationMasterKeys(); + assertEquals(2, loadedAppKeys.size()); + assertNull(loadedAppKeys.get(attempt1)); + assertEquals(attemptKey2, loadedAppKeys.get(attempt2)); + assertEquals(attemptKey3, loadedAppKeys.get(attempt3)); + } + + private static class NMTokenSecretManagerForTest extends + BaseNMTokenSecretManager { + public MasterKey generateKey() { + return createNewMasterKey().getMasterKey(); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/security/TestNMTokenSecretManagerInNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/security/TestNMTokenSecretManagerInNM.java new file mode 100644 index 00000000000..1f1fc51e568 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/security/TestNMTokenSecretManagerInNM.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.security; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.security.NMTokenIdentifier; +import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; +import org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.junit.Test; + +public class TestNMTokenSecretManagerInNM { + + @Test + public void testRecovery() throws IOException { + YarnConfiguration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); + final NodeId nodeId = NodeId.newInstance("somehost", 1234); + final ApplicationAttemptId attempt1 = + ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + final ApplicationAttemptId attempt2 = + ApplicationAttemptId.newInstance(ApplicationId.newInstance(2, 2), 2); + NMTokenKeyGeneratorForTest keygen = new NMTokenKeyGeneratorForTest(); + NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService(); + stateStore.init(conf); + stateStore.start(); + NMTokenSecretManagerInNM secretMgr = + new NMTokenSecretManagerInNM(stateStore); + secretMgr.setNodeId(nodeId); + MasterKey currentKey = keygen.generateKey(); + secretMgr.setMasterKey(currentKey); + NMTokenIdentifier attemptToken1 = + getNMTokenId(secretMgr.createNMToken(attempt1, nodeId, "user1")); + NMTokenIdentifier attemptToken2 = + getNMTokenId(secretMgr.createNMToken(attempt2, nodeId, "user2")); + secretMgr.appAttemptStartContainer(attemptToken1); + secretMgr.appAttemptStartContainer(attemptToken2); + assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1)); + assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2)); + assertNotNull(secretMgr.retrievePassword(attemptToken1)); + assertNotNull(secretMgr.retrievePassword(attemptToken2)); + + // restart and verify key is still there and token still valid + secretMgr = new NMTokenSecretManagerInNM(stateStore); + secretMgr.recover(stateStore.loadNMTokenState()); + secretMgr.setNodeId(nodeId); + assertEquals(currentKey, secretMgr.getCurrentKey()); + assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1)); + assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2)); + assertNotNull(secretMgr.retrievePassword(attemptToken1)); + assertNotNull(secretMgr.retrievePassword(attemptToken2)); + + // roll master key and remove an app + currentKey = keygen.generateKey(); + secretMgr.setMasterKey(currentKey); + secretMgr.appFinished(attempt1.getApplicationId()); + + // restart and verify attempt1 key is still valid due to prev key persist + secretMgr = new NMTokenSecretManagerInNM(stateStore); + secretMgr.recover(stateStore.loadNMTokenState()); + secretMgr.setNodeId(nodeId); + assertEquals(currentKey, secretMgr.getCurrentKey()); + assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1)); + assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2)); + assertNotNull(secretMgr.retrievePassword(attemptToken1)); + assertNotNull(secretMgr.retrievePassword(attemptToken2)); + + // roll master key again, restart, and verify attempt1 key is bad but + // attempt2 is still good due to app key persist + currentKey = keygen.generateKey(); + secretMgr.setMasterKey(currentKey); + secretMgr = new NMTokenSecretManagerInNM(stateStore); + secretMgr.recover(stateStore.loadNMTokenState()); + secretMgr.setNodeId(nodeId); + assertEquals(currentKey, secretMgr.getCurrentKey()); + assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1)); + assertTrue(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2)); + try { + secretMgr.retrievePassword(attemptToken1); + fail("attempt token should not still be valid"); + } catch (InvalidToken e) { + // expected + } + assertNotNull(secretMgr.retrievePassword(attemptToken2)); + + // remove last attempt, restart, verify both tokens are now bad + secretMgr.appFinished(attempt2.getApplicationId()); + secretMgr = new NMTokenSecretManagerInNM(stateStore); + secretMgr.recover(stateStore.loadNMTokenState()); + secretMgr.setNodeId(nodeId); + assertEquals(currentKey, secretMgr.getCurrentKey()); + assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt1)); + assertFalse(secretMgr.isAppAttemptNMTokenKeyPresent(attempt2)); + try { + secretMgr.retrievePassword(attemptToken1); + fail("attempt token should not still be valid"); + } catch (InvalidToken e) { + // expected + } + try { + secretMgr.retrievePassword(attemptToken2); + fail("attempt token should not still be valid"); + } catch (InvalidToken e) { + // expected + } + + stateStore.close(); + } + + private NMTokenIdentifier getNMTokenId( + org.apache.hadoop.yarn.api.records.Token token) throws IOException { + Token convertedToken = + ConverterUtils.convertFromYarn(token, (Text) null); + return convertedToken.decodeIdentifier(); + } + + private static class NMTokenKeyGeneratorForTest extends + BaseNMTokenSecretManager { + public MasterKey generateKey() { + return createNewMasterKey().getMasterKey(); + } + } +} From 2c5ceee6abaa47678bccdfb425a507586d6a2d31 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Fri, 18 Jul 2014 04:27:09 +0000 Subject: [PATCH 29/49] HDFS-6655. Add 'header banner' to 'explorer.html' also in Namenode UI ( Contributed by Vinayakumar B ) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1611538 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../src/main/webapps/hdfs/explorer.html | 31 +++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 00d1e93b68d..39b83c0b46d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -292,6 +292,9 @@ Release 2.6.0 - UNRELEASED HDFS-5624. Add HDFS tests for ACLs in combination with viewfs. (Stephen Chu via cnauroth) + HDFS-6655. Add 'header banner' to 'explorer.html' also in Namenode UI + (vinayakumarb) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html index 0c5fed24395..e1fdfa341e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html @@ -24,6 +24,29 @@ Browsing HDFS + +

+