Merge r1606557 from trunk. YARN-2052. Embedded an epoch number in container id to ensure the uniqueness of container id after RM restarts. Contributed by Tsuyoshi OZAWA

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1606558 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jian He 2014-06-29 18:26:08 +00:00
parent 56294d8457
commit a385b77df7
21 changed files with 302 additions and 18 deletions

View File

@ -30,6 +30,9 @@ Release 2.5.0 - UNRELEASED
YARN-1365. Changed ApplicationMasterService to allow an app to re-register YARN-1365. Changed ApplicationMasterService to allow an app to re-register
after RM restart. (Anubhav Dhoot via jianhe) after RM restart. (Anubhav Dhoot via jianhe)
YARN-2052. Embedded an epoch number in container id to ensure the uniqueness
of container id after RM restarts. (Tsuyoshi OZAWA via jianhe)
IMPROVEMENTS IMPROVEMENTS
YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via

View File

@ -135,6 +135,10 @@ message RMStateVersionProto {
optional int32 minor_version = 2; optional int32 minor_version = 2;
} }
message EpochProto {
optional int64 epoch = 1;
}
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
///////////// RM Failover related records //////////////////////// ///////////// RM Failover related records ////////////////////////
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////

View File

@ -101,4 +101,6 @@ void setRMApplicationHistoryWriter(
ConfigurationProvider getConfigurationProvider(); ConfigurationProvider getConfigurationProvider();
boolean isWorkPreservingRecoveryEnabled(); boolean isWorkPreservingRecoveryEnabled();
int getEpoch();
} }

View File

@ -82,6 +82,7 @@ public class RMContextImpl implements RMContext {
private ApplicationMasterService applicationMasterService; private ApplicationMasterService applicationMasterService;
private RMApplicationHistoryWriter rmApplicationHistoryWriter; private RMApplicationHistoryWriter rmApplicationHistoryWriter;
private ConfigurationProvider configurationProvider; private ConfigurationProvider configurationProvider;
private int epoch;
/** /**
* Default constructor. To be used in conjunction with setter methods for * Default constructor. To be used in conjunction with setter methods for
@ -359,4 +360,13 @@ public void setConfigurationProvider(
ConfigurationProvider configurationProvider) { ConfigurationProvider configurationProvider) {
this.configurationProvider = configurationProvider; this.configurationProvider = configurationProvider;
} }
@Override
public int getEpoch() {
return this.epoch;
}
void setEpoch(int epoch) {
this.epoch = epoch;
}
} }

View File

@ -482,6 +482,9 @@ protected void serviceStart() throws Exception {
if(recoveryEnabled) { if(recoveryEnabled) {
try { try {
rmStore.checkVersion(); rmStore.checkVersion();
if (rmContext.isWorkPreservingRecoveryEnabled()) {
rmContext.setEpoch(rmStore.getAndIncrementEpoch());
}
RMState state = rmStore.loadState(); RMState state = rmStore.loadState();
recover(state); recover(state);
} catch (Exception e) { } catch (Exception e) {

View File

@ -43,15 +43,19 @@
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RMStateVersionProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RMStateVersionProto;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Epoch;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.EpochPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ConverterUtils;
@ -71,7 +75,7 @@ public class FileSystemRMStateStore extends RMStateStore {
protected static final String ROOT_DIR_NAME = "FSRMStateRoot"; protected static final String ROOT_DIR_NAME = "FSRMStateRoot";
protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion
.newInstance(1, 0); .newInstance(1, 1);
protected FileSystem fs; protected FileSystem fs;
@ -145,7 +149,30 @@ protected synchronized void storeVersion() throws Exception {
writeFile(versionNodePath, data); writeFile(versionNodePath, data);
} }
} }
@Override
public synchronized int getAndIncrementEpoch() throws Exception {
Path epochNodePath = getNodePath(rootDirPath, EPOCH_NODE);
int currentEpoch = 0;
if (fs.exists(epochNodePath)) {
// load current epoch
FileStatus status = fs.getFileStatus(epochNodePath);
byte[] data = readFile(epochNodePath, status.getLen());
Epoch epoch = new EpochPBImpl(EpochProto.parseFrom(data));
currentEpoch = epoch.getEpoch();
// increment epoch and store it
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
.toByteArray();
updateFile(epochNodePath, storeData);
} else {
// initialize epoch file with 1 for the next time.
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
.toByteArray();
writeFile(epochNodePath, storeData);
}
return currentEpoch;
}
@Override @Override
public synchronized RMState loadState() throws Exception { public synchronized RMState loadState() throws Exception {
RMState rmState = new RMState(); RMState rmState = new RMState();

View File

@ -43,6 +43,8 @@
public class MemoryRMStateStore extends RMStateStore { public class MemoryRMStateStore extends RMStateStore {
RMState state = new RMState(); RMState state = new RMState();
private int epoch = 0;
@VisibleForTesting @VisibleForTesting
public RMState getState() { public RMState getState() {
return state; return state;
@ -52,6 +54,13 @@ public RMState getState() {
public void checkVersion() throws Exception { public void checkVersion() throws Exception {
} }
@Override
public synchronized int getAndIncrementEpoch() throws Exception {
int currentEpoch = epoch;
epoch = epoch + 1;
return currentEpoch;
}
@Override @Override
public synchronized RMState loadState() throws Exception { public synchronized RMState loadState() throws Exception {
// return a copy of the state to allow for modification of the real state // return a copy of the state to allow for modification of the real state

View File

@ -47,6 +47,11 @@ protected void closeInternal() throws Exception {
// Do nothing // Do nothing
} }
@Override
public synchronized int getAndIncrementEpoch() throws Exception {
return 0;
}
@Override @Override
public RMState loadState() throws Exception { public RMState loadState() throws Exception {
throw new UnsupportedOperationException("Cannot load state from null store"); throw new UnsupportedOperationException("Cannot load state from null store");

View File

@ -45,6 +45,7 @@
import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.RMFatalEvent; import org.apache.hadoop.yarn.server.resourcemanager.RMFatalEvent;
@ -85,6 +86,7 @@ public abstract class RMStateStore extends AbstractService {
protected static final String DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX = protected static final String DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX =
"RMDTSequenceNumber_"; "RMDTSequenceNumber_";
protected static final String VERSION_NODE = "RMVersionNode"; protected static final String VERSION_NODE = "RMVersionNode";
protected static final String EPOCH_NODE = "EpochNode";
public static final Log LOG = LogFactory.getLog(RMStateStore.class); public static final Log LOG = LogFactory.getLog(RMStateStore.class);
@ -520,6 +522,12 @@ public void checkVersion() throws Exception {
*/ */
protected abstract RMStateVersion getCurrentVersion(); protected abstract RMStateVersion getCurrentVersion();
/**
* Get the current epoch of RM and increment the value.
*/
public abstract int getAndIncrementEpoch() throws Exception;
/** /**
* Blocking API * Blocking API
* The derived class must recover state from the store and return a new * The derived class must recover state from the store and return a new

View File

@ -44,16 +44,21 @@
import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RMStateVersionProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RMStateVersionProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.RMZKUtils; import org.apache.hadoop.yarn.server.resourcemanager.RMZKUtils;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Epoch;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.EpochPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.CreateMode;
@ -81,7 +86,7 @@ public class ZKRMStateStore extends RMStateStore {
protected static final String ROOT_ZNODE_NAME = "ZKRMStateRoot"; protected static final String ROOT_ZNODE_NAME = "ZKRMStateRoot";
protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion
.newInstance(1, 0); .newInstance(1, 1);
private static final String RM_DELEGATION_TOKENS_ROOT_ZNODE_NAME = private static final String RM_DELEGATION_TOKENS_ROOT_ZNODE_NAME =
"RMDelegationTokensRoot"; "RMDelegationTokensRoot";
private static final String RM_DT_SEQUENTIAL_NUMBER_ZNODE_NAME = private static final String RM_DT_SEQUENTIAL_NUMBER_ZNODE_NAME =
@ -102,6 +107,7 @@ public class ZKRMStateStore extends RMStateStore {
* *
* ROOT_DIR_PATH * ROOT_DIR_PATH
* |--- VERSION_INFO * |--- VERSION_INFO
* |--- EPOCH_NODE
* |--- RM_ZK_FENCING_LOCK * |--- RM_ZK_FENCING_LOCK
* |--- RM_APP_ROOT * |--- RM_APP_ROOT
* | |----- (#ApplicationId1) * | |----- (#ApplicationId1)
@ -391,6 +397,28 @@ protected synchronized RMStateVersion loadVersion() throws Exception {
return null; return null;
} }
@Override
public synchronized int getAndIncrementEpoch() throws Exception {
String epochNodePath = getNodePath(zkRootNodePath, EPOCH_NODE);
int currentEpoch = 0;
if (existsWithRetries(epochNodePath, true) != null) {
// load current epoch
byte[] data = getDataWithRetries(epochNodePath, true);
Epoch epoch = new EpochPBImpl(EpochProto.parseFrom(data));
currentEpoch = epoch.getEpoch();
// increment epoch and store it
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
.toByteArray();
setDataWithRetries(epochNodePath, storeData, -1);
} else {
// initialize epoch node with 1 for the next time.
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
.toByteArray();
createWithRetries(epochNodePath, storeData, zkAcl, CreateMode.PERSISTENT);
}
return currentEpoch;
}
@Override @Override
public synchronized RMState loadState() throws Exception { public synchronized RMState loadState() throws Exception {
RMState rmState = new RMState(); RMState rmState = new RMState();

View File

@ -0,0 +1,74 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.recovery.records;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
import org.apache.hadoop.yarn.util.Records;
/**
* The epoch information of RM for work-preserving restart.
* Epoch is incremented each time RM restart. It's used for assuring
* uniqueness of <code>ContainerId</code>.
*/
@Private
@Unstable
public abstract class Epoch {
public static Epoch newInstance(int sequenceNumber) {
Epoch epoch = Records.newRecord(Epoch.class);
epoch.setEpoch(sequenceNumber);
return epoch;
}
public abstract int getEpoch();
public abstract void setEpoch(int sequenceNumber);
public abstract EpochProto getProto();
public String toString() {
return String.valueOf(getEpoch());
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + getEpoch();
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Epoch other = (Epoch) obj;
if (this.getEpoch() == other.getEpoch()) {
return true;
} else {
return false;
}
}
}

View File

@ -0,0 +1,67 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProtoOrBuilder;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Epoch;
public class EpochPBImpl extends Epoch {
EpochProto proto = EpochProto.getDefaultInstance();
EpochProto.Builder builder = null;
boolean viaProto = false;
public EpochPBImpl() {
builder = EpochProto.newBuilder();
}
public EpochPBImpl(EpochProto proto) {
this.proto = proto;
viaProto = true;
}
public EpochProto getProto() {
proto = viaProto ? proto : builder.build();
viaProto = true;
return proto;
}
private void maybeInitBuilder() {
if (viaProto || builder == null) {
builder = EpochProto.newBuilder(proto);
}
viaProto = false;
}
@Override
public int getEpoch() {
EpochProtoOrBuilder p = viaProto ? proto : builder;
return (int) (p.getEpoch() & 0xffffffff);
}
@Override
public void setEpoch(int sequentialNumber) {
maybeInitBuilder();
builder.setEpoch(sequentialNumber);
}
}

View File

@ -57,7 +57,10 @@ public class AppSchedulingInfo {
private final String queueName; private final String queueName;
Queue queue; Queue queue;
final String user; final String user;
private final AtomicInteger containerIdCounter = new AtomicInteger(0); // TODO making containerIdCounter long
private final AtomicInteger containerIdCounter;
private final int EPOCH_BIT_MASK = 0x3ff;
private final int EPOCH_BIT_SHIFT = 22;
final Set<Priority> priorities = new TreeSet<Priority>( final Set<Priority> priorities = new TreeSet<Priority>(
new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator()); new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator());
@ -70,15 +73,19 @@ public class AppSchedulingInfo {
/* Allocated by scheduler */ /* Allocated by scheduler */
boolean pending = true; // for app metrics boolean pending = true; // for app metrics
public AppSchedulingInfo(ApplicationAttemptId appAttemptId, public AppSchedulingInfo(ApplicationAttemptId appAttemptId,
String user, Queue queue, ActiveUsersManager activeUsersManager) { String user, Queue queue, ActiveUsersManager activeUsersManager,
int epoch) {
this.applicationAttemptId = appAttemptId; this.applicationAttemptId = appAttemptId;
this.applicationId = appAttemptId.getApplicationId(); this.applicationId = appAttemptId.getApplicationId();
this.queue = queue; this.queue = queue;
this.queueName = queue.getQueueName(); this.queueName = queue.getQueueName();
this.user = user; this.user = user;
this.activeUsersManager = activeUsersManager; this.activeUsersManager = activeUsersManager;
this.containerIdCounter = new AtomicInteger(
(epoch & EPOCH_BIT_MASK) << EPOCH_BIT_SHIFT);
} }
public ApplicationId getApplicationId() { public ApplicationId getApplicationId() {
@ -413,9 +420,6 @@ public synchronized void transferStateFromPreviousAppSchedulingInfo(
} }
public synchronized void recoverContainer(RMContainer rmContainer) { public synchronized void recoverContainer(RMContainer rmContainer) {
// ContainerIdCounter on recovery will be addressed in YARN-2052
this.containerIdCounter.incrementAndGet();
QueueMetrics metrics = queue.getMetrics(); QueueMetrics metrics = queue.getMetrics();
if (pending) { if (pending) {
// If there was any container to recover, the application was // If there was any container to recover, the application was

View File

@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.yarn.server.resourcemanager.scheduler; package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import com.google.common.base.Preconditions;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -40,6 +41,7 @@
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@ -106,13 +108,14 @@ public class SchedulerApplicationAttempt {
public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId, public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId,
String user, Queue queue, ActiveUsersManager activeUsersManager, String user, Queue queue, ActiveUsersManager activeUsersManager,
RMContext rmContext) { RMContext rmContext) {
Preconditions.checkNotNull("RMContext should not be null", rmContext);
this.rmContext = rmContext; this.rmContext = rmContext;
this.appSchedulingInfo = this.appSchedulingInfo =
new AppSchedulingInfo(applicationAttemptId, user, queue, new AppSchedulingInfo(applicationAttemptId, user, queue,
activeUsersManager); activeUsersManager, rmContext.getEpoch());
this.queue = queue; this.queue = queue;
if (rmContext != null && rmContext.getRMApps() != null && if (rmContext.getRMApps() != null &&
rmContext.getRMApps() rmContext.getRMApps()
.containsKey(applicationAttemptId.getApplicationId())) { .containsKey(applicationAttemptId.getApplicationId())) {
ApplicationSubmissionContext appSubmissionContext = ApplicationSubmissionContext appSubmissionContext =

View File

@ -218,7 +218,7 @@ public void testSchedulerRecovery() throws Exception {
assertEquals(availableResources, schedulerAttempt.getHeadroom()); assertEquals(availableResources, schedulerAttempt.getHeadroom());
// *********** check appSchedulingInfo state *********** // *********** check appSchedulingInfo state ***********
assertEquals(4, schedulerAttempt.getNewContainerId()); assertEquals((1 << 22) + 1, schedulerAttempt.getNewContainerId());
} }
private void checkCSQueue(MockRM rm, private void checkCSQueue(MockRM rm,

View File

@ -495,6 +495,21 @@ public void testCheckVersion(RMStateStoreHelper stateStoreHelper)
Assert.assertTrue(t instanceof RMStateVersionIncompatibleException); Assert.assertTrue(t instanceof RMStateVersionIncompatibleException);
} }
} }
public void testEpoch(RMStateStoreHelper stateStoreHelper)
throws Exception {
RMStateStore store = stateStoreHelper.getRMStateStore();
store.setRMDispatcher(new TestDispatcher());
int firstTimeEpoch = store.getAndIncrementEpoch();
Assert.assertEquals(0, firstTimeEpoch);
int secondTimeEpoch = store.getAndIncrementEpoch();
Assert.assertEquals(1, secondTimeEpoch);
int thirdTimeEpoch = store.getAndIncrementEpoch();
Assert.assertEquals(2, thirdTimeEpoch);
}
public void testAppDeletion(RMStateStoreHelper stateStoreHelper) public void testAppDeletion(RMStateStoreHelper stateStoreHelper)
throws Exception { throws Exception {

View File

@ -158,6 +158,7 @@ public void testFSRMStateStore() throws Exception {
.getFileSystem(conf).exists(tempAppAttemptFile)); .getFileSystem(conf).exists(tempAppAttemptFile));
testRMDTSecretManagerStateStore(fsTester); testRMDTSecretManagerStateStore(fsTester);
testCheckVersion(fsTester); testCheckVersion(fsTester);
testEpoch(fsTester);
testAppDeletion(fsTester); testAppDeletion(fsTester);
} finally { } finally {
cluster.shutdown(); cluster.shutdown();

View File

@ -120,6 +120,7 @@ public void testZKRMStateStoreRealZK() throws Exception {
testRMAppStateStore(zkTester); testRMAppStateStore(zkTester);
testRMDTSecretManagerStateStore(zkTester); testRMDTSecretManagerStateStore(zkTester);
testCheckVersion(zkTester); testCheckVersion(zkTester);
testEpoch(zkTester);
testAppDeletion(zkTester); testAppDeletion(zkTester);
} }

View File

@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.yarn.server.resourcemanager.scheduler; package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.*; import static org.mockito.Mockito.*;
@ -61,10 +62,15 @@ public void testMove() {
QueueMetrics newMetrics = newQueue.getMetrics(); QueueMetrics newMetrics = newQueue.getMetrics();
ApplicationAttemptId appAttId = createAppAttemptId(0, 0); ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
RMContext rmContext = mock(RMContext.class);
when(rmContext.getEpoch()).thenReturn(3);
SchedulerApplicationAttempt app = new SchedulerApplicationAttempt(appAttId, SchedulerApplicationAttempt app = new SchedulerApplicationAttempt(appAttId,
user, oldQueue, oldQueue.getActiveUsersManager(), null); user, oldQueue, oldQueue.getActiveUsersManager(), rmContext);
oldMetrics.submitApp(user); oldMetrics.submitApp(user);
// confirm that containerId is calculated based on epoch.
assertEquals(app.getNewContainerId(), 0x00c00001);
// Resource request // Resource request
Resource requestedResource = Resource.newInstance(1536, 2); Resource requestedResource = Resource.newInstance(1536, 2);
Priority requestedPriority = Priority.newInstance(2); Priority requestedPriority = Priority.newInstance(2);

View File

@ -23,6 +23,7 @@
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.Clock;
import org.junit.Test; import org.junit.Test;
@ -59,8 +60,11 @@ public void testDelayScheduling() {
double rackLocalityThreshold = .6; double rackLocalityThreshold = .6;
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1); ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
RMContext rmContext = Mockito.mock(RMContext.class);
Mockito.when(rmContext.getEpoch()).thenReturn(0);
FSSchedulerApp schedulerApp = FSSchedulerApp schedulerApp =
new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null); new FSSchedulerApp(applicationAttemptId, "user1", queue , null,
rmContext);
// Default level should be node-local // Default level should be node-local
assertEquals(NodeType.NODE_LOCAL, schedulerApp.getAllowedLocalityLevel( assertEquals(NodeType.NODE_LOCAL, schedulerApp.getAllowedLocalityLevel(
@ -118,10 +122,12 @@ public void testDelaySchedulingForContinuousScheduling()
long nodeLocalityDelayMs = 5 * 1000L; // 5 seconds long nodeLocalityDelayMs = 5 * 1000L; // 5 seconds
long rackLocalityDelayMs = 6 * 1000L; // 6 seconds long rackLocalityDelayMs = 6 * 1000L; // 6 seconds
RMContext rmContext = Mockito.mock(RMContext.class);
Mockito.when(rmContext.getEpoch()).thenReturn(0);
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1); ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
FSSchedulerApp schedulerApp = FSSchedulerApp schedulerApp =
new FSSchedulerApp(applicationAttemptId, "user1", queue, new FSSchedulerApp(applicationAttemptId, "user1", queue,
null, null); null, rmContext);
AppSchedulable appSchedulable = Mockito.mock(AppSchedulable.class); AppSchedulable appSchedulable = Mockito.mock(AppSchedulable.class);
long startTime = clock.getTime(); long startTime = clock.getTime();
Mockito.when(appSchedulable.getStartTime()).thenReturn(startTime); Mockito.when(appSchedulable.getStartTime()).thenReturn(startTime);
@ -173,9 +179,12 @@ public void testLocalityLevelWithoutDelays() {
Priority prio = Mockito.mock(Priority.class); Priority prio = Mockito.mock(Priority.class);
Mockito.when(prio.getPriority()).thenReturn(1); Mockito.when(prio.getPriority()).thenReturn(1);
RMContext rmContext = Mockito.mock(RMContext.class);
Mockito.when(rmContext.getEpoch()).thenReturn(0);
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1); ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
FSSchedulerApp schedulerApp = FSSchedulerApp schedulerApp =
new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null); new FSSchedulerApp(applicationAttemptId, "user1", queue , null,
rmContext);
assertEquals(NodeType.OFF_SWITCH, schedulerApp.getAllowedLocalityLevel( assertEquals(NodeType.OFF_SWITCH, schedulerApp.getAllowedLocalityLevel(
prio, 10, -1.0, -1.0)); prio, 10, -1.0, -1.0));
} }

View File

@ -28,6 +28,7 @@
import java.util.Map; import java.util.Map;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.junit.Before; import org.junit.Before;
@ -40,6 +41,7 @@ public class TestMaxRunningAppsEnforcer {
private MaxRunningAppsEnforcer maxAppsEnforcer; private MaxRunningAppsEnforcer maxAppsEnforcer;
private int appNum; private int appNum;
private TestFairScheduler.MockClock clock; private TestFairScheduler.MockClock clock;
private RMContext rmContext;
@Before @Before
public void setup() throws Exception { public void setup() throws Exception {
@ -59,13 +61,16 @@ public void setup() throws Exception {
userMaxApps = allocConf.userMaxApps; userMaxApps = allocConf.userMaxApps;
maxAppsEnforcer = new MaxRunningAppsEnforcer(scheduler); maxAppsEnforcer = new MaxRunningAppsEnforcer(scheduler);
appNum = 0; appNum = 0;
rmContext = mock(RMContext.class);
when(rmContext.getEpoch()).thenReturn(0);
} }
private FSSchedulerApp addApp(FSLeafQueue queue, String user) { private FSSchedulerApp addApp(FSLeafQueue queue, String user) {
ApplicationId appId = ApplicationId.newInstance(0l, appNum++); ApplicationId appId = ApplicationId.newInstance(0l, appNum++);
ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 0); ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 0);
boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, user); boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, user);
FSSchedulerApp app = new FSSchedulerApp(attId, user, queue, null, null); FSSchedulerApp app = new FSSchedulerApp(attId, user, queue, null,
rmContext);
queue.addApp(app, runnable); queue.addApp(app, runnable);
if (runnable) { if (runnable) {
maxAppsEnforcer.trackRunnableApp(app); maxAppsEnforcer.trackRunnableApp(app);