YARN-8242. YARN NM: OOM error while reading back the state store on recovery. Contributed by Pradeep Ambati and Kanwaljeet Sachdev

(cherry picked from commit 65e7469712)
This commit is contained in:
Jason Lowe 2018-08-20 10:14:40 -05:00
parent d7442c244f
commit 44c4928b64
11 changed files with 657 additions and 320 deletions

View File

@ -19,13 +19,14 @@
package org.apache.hadoop.yarn.server.nodemanager; package org.apache.hadoop.yarn.server.nodemanager;
import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.concurrent.TimeUnit.SECONDS;
import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ScheduledThreadPoolExecutor;
@ -96,17 +97,21 @@ public class DeletionService extends AbstractService {
private void recover(NMStateStoreService.RecoveredDeletionServiceState state) private void recover(NMStateStoreService.RecoveredDeletionServiceState state)
throws IOException { throws IOException {
List<DeletionServiceDeleteTaskProto> taskProtos = state.getTasks();
Map<Integer, DeletionTaskRecoveryInfo> idToInfoMap = Map<Integer, DeletionTaskRecoveryInfo> idToInfoMap =
new HashMap<>(taskProtos.size()); new HashMap<Integer, DeletionTaskRecoveryInfo>();
Set<Integer> successorTasks = new HashSet<>(); Set<Integer> successorTasks = new HashSet<Integer>();
for (DeletionServiceDeleteTaskProto proto : taskProtos) {
try (RecoveryIterator<DeletionServiceDeleteTaskProto> it =
state.getIterator()) {
while (it.hasNext()) {
DeletionServiceDeleteTaskProto proto = it.next();
DeletionTaskRecoveryInfo info = DeletionTaskRecoveryInfo info =
NMProtoUtils.convertProtoToDeletionTaskRecoveryInfo(proto, this); NMProtoUtils.convertProtoToDeletionTaskRecoveryInfo(proto, this);
idToInfoMap.put(info.getTask().getTaskId(), info); idToInfoMap.put(info.getTask().getTaskId(), info);
nextTaskId.set(Math.max(nextTaskId.get(), info.getTask().getTaskId())); nextTaskId.set(Math.max(nextTaskId.get(), info.getTask().getTaskId()));
successorTasks.addAll(info.getSuccessorTaskIds()); successorTasks.addAll(info.getSuccessorTaskIds());
} }
}
// restore the task dependencies and schedule the deletion tasks that // restore the task dependencies and schedule the deletion tasks that
// have no predecessors // have no predecessors

View File

@ -23,6 +23,7 @@ import com.google.protobuf.ByteString;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.UpdateContainerTokenEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.UpdateContainerTokenEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerTokenUpdatedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerTokenUpdatedEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent;
import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Private;
@ -356,20 +357,27 @@ public class ContainerManagerImpl extends CompositeService implements
stateStore.loadLocalizationState()); stateStore.loadLocalizationState());
RecoveredApplicationsState appsState = stateStore.loadApplicationsState(); RecoveredApplicationsState appsState = stateStore.loadApplicationsState();
for (ContainerManagerApplicationProto proto : try (RecoveryIterator<ContainerManagerApplicationProto> rasIterator =
appsState.getApplications()) { appsState.getIterator()) {
while (rasIterator.hasNext()) {
ContainerManagerApplicationProto proto = rasIterator.next();
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Recovering application with state: " + proto.toString()); LOG.debug("Recovering application with state: " + proto.toString());
} }
recoverApplication(proto); recoverApplication(proto);
} }
}
for (RecoveredContainerState rcs : stateStore.loadContainersState()) { try (RecoveryIterator<RecoveredContainerState> rcsIterator =
stateStore.getContainerStateIterator()) {
while (rcsIterator.hasNext()) {
RecoveredContainerState rcs = rcsIterator.next();
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Recovering container with state: " + rcs); LOG.debug("Recovering container with state: " + rcs);
} }
recoverContainer(rcs); recoverContainer(rcs);
} }
}
// Recovery AMRMProxy state after apps and containers are recovered // Recovery AMRMProxy state after apps and containers are recovered
if (this.amrmProxyEnabled) { if (this.amrmProxyEnabled) {

View File

@ -19,6 +19,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
import static org.apache.hadoop.fs.CreateFlag.CREATE; import static org.apache.hadoop.fs.CreateFlag.CREATE;
import static org.apache.hadoop.fs.CreateFlag.OVERWRITE; import static org.apache.hadoop.fs.CreateFlag.OVERWRITE;
import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -298,18 +300,21 @@ public class ResourceLocalizationService extends CompositeService
//Recover localized resources after an NM restart //Recover localized resources after an NM restart
public void recoverLocalizedResources(RecoveredLocalizationState state) public void recoverLocalizedResources(RecoveredLocalizationState state)
throws URISyntaxException { throws URISyntaxException, IOException {
LocalResourceTrackerState trackerState = state.getPublicTrackerState(); LocalResourceTrackerState trackerState = state.getPublicTrackerState();
recoverTrackerResources(publicRsrc, trackerState); recoverTrackerResources(publicRsrc, trackerState);
for (Map.Entry<String, RecoveredUserResources> userEntry : try (RecoveryIterator<Map.Entry<String, RecoveredUserResources>> it
state.getUserResources().entrySet()) { = state.getIterator()) {
while (it.hasNext()) {
Map.Entry<String, RecoveredUserResources> userEntry = it.next();
String user = userEntry.getKey(); String user = userEntry.getKey();
RecoveredUserResources userResources = userEntry.getValue(); RecoveredUserResources userResources = userEntry.getValue();
trackerState = userResources.getPrivateTrackerState(); trackerState = userResources.getPrivateTrackerState();
if (!trackerState.isEmpty()) { if (!trackerState.isEmpty()) {
LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user, LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
null, dispatcher, true, super.getConfig(), stateStore, dirsHandler); null, dispatcher, true, super.getConfig(), stateStore,
dirsHandler);
LocalResourcesTracker oldTracker = privateRsrc.putIfAbsent(user, LocalResourcesTracker oldTracker = privateRsrc.putIfAbsent(user,
tracker); tracker);
if (oldTracker != null) { if (oldTracker != null) {
@ -337,6 +342,7 @@ public class ResourceLocalizationService extends CompositeService
} }
} }
} }
}
private void recoverTrackerResources(LocalResourcesTracker tracker, private void recoverTrackerResources(LocalResourcesTracker tracker,
LocalResourceTrackerState state) throws URISyntaxException { LocalResourceTrackerState state) throws URISyntaxException {

View File

@ -66,6 +66,7 @@ import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.AbstractMap;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -73,6 +74,7 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set; import java.util.Set;
import java.util.Timer; import java.util.Timer;
import java.util.TimerTask; import java.util.TimerTask;
@ -225,68 +227,119 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
return isHealthy; return isHealthy;
} }
@Override // LeveldbIterator starting at startkey
public List<RecoveredContainerState> loadContainersState() private LeveldbIterator getLevelDBIterator(String startKey)
throws IOException { throws IOException {
ArrayList<RecoveredContainerState> containers =
new ArrayList<RecoveredContainerState>();
ArrayList<ContainerId> containersToRemove =
new ArrayList<ContainerId>();
LeveldbIterator iter = null;
try { try {
iter = new LeveldbIterator(db); LeveldbIterator it = new LeveldbIterator(db);
iter.seek(bytes(CONTAINERS_KEY_PREFIX)); it.seek(bytes(startKey));
return it;
} catch (DBException e) {
throw new IOException(e);
}
}
while (iter.hasNext()) { // Base Recovery Iterator
Entry<byte[], byte[]> entry = iter.peekNext(); private abstract class BaseRecoveryIterator<T> implements
RecoveryIterator<T> {
LeveldbIterator it;
T nextItem;
BaseRecoveryIterator(String dbKey) throws IOException {
this.it = getLevelDBIterator(dbKey);
this.nextItem = null;
}
protected abstract T getNextItem(LeveldbIterator it) throws IOException;
@Override
public boolean hasNext() throws IOException {
if (nextItem == null) {
nextItem = getNextItem(it);
}
return (nextItem != null);
}
@Override
public T next() throws IOException, NoSuchElementException {
T tmp = nextItem;
if (tmp != null) {
nextItem = null;
return tmp;
} else {
tmp = getNextItem(it);
if (tmp == null) {
throw new NoSuchElementException();
}
return tmp;
}
}
@Override
public void close() throws IOException {
if (it != null) {
it.close();
}
}
}
// Container Recovery Iterator
private class ContainerStateIterator extends
BaseRecoveryIterator<RecoveredContainerState> {
ContainerStateIterator() throws IOException {
super(CONTAINERS_KEY_PREFIX);
}
@Override
protected RecoveredContainerState getNextItem(LeveldbIterator it)
throws IOException {
return getNextRecoveredContainer(it);
}
}
private RecoveredContainerState getNextRecoveredContainer(LeveldbIterator it)
throws IOException {
RecoveredContainerState rcs = null;
try {
while (it.hasNext()) {
Entry<byte[], byte[]> entry = it.peekNext();
String key = asString(entry.getKey()); String key = asString(entry.getKey());
if (!key.startsWith(CONTAINERS_KEY_PREFIX)) { if (!key.startsWith(CONTAINERS_KEY_PREFIX)) {
break; return null;
} }
int idEndPos = key.indexOf('/', CONTAINERS_KEY_PREFIX.length()); int idEndPos = key.indexOf('/', CONTAINERS_KEY_PREFIX.length());
if (idEndPos < 0) { if (idEndPos < 0) {
throw new IOException("Unable to determine container in key: " + key); throw new IOException("Unable to determine container in key: " + key);
} }
ContainerId containerId = ContainerId.fromString( String keyPrefix = key.substring(0, idEndPos + 1);
key.substring(CONTAINERS_KEY_PREFIX.length(), idEndPos)); rcs = loadContainerState(it, keyPrefix);
String keyPrefix = key.substring(0, idEndPos+1);
RecoveredContainerState rcs = loadContainerState(containerId,
iter, keyPrefix);
// Don't load container without StartContainerRequest
if (rcs.startRequest != null) { if (rcs.startRequest != null) {
containers.add(rcs); break;
} else { } else {
containersToRemove.add(containerId); removeContainer(rcs.getContainerId());
rcs = null;
} }
} }
} catch (DBException e) { } catch (DBException e) {
throw new IOException(e); throw new IOException(e);
} finally {
if (iter != null) {
iter.close();
} }
return rcs;
} }
// remove container without StartContainerRequest
for (ContainerId containerId : containersToRemove) { @Override
LOG.warn("Remove container " + containerId + public RecoveryIterator<RecoveredContainerState> getContainerStateIterator()
" with incomplete records"); throws IOException {
try { return new ContainerStateIterator();
removeContainer(containerId);
// TODO: kill and cleanup the leaked container
} catch (IOException e) {
LOG.error("Unable to remove container " + containerId +
" in store", e);
}
} }
return containers; private RecoveredContainerState loadContainerState(LeveldbIterator iter,
} String keyPrefix) throws IOException {
ContainerId containerId = ContainerId.fromString(
private RecoveredContainerState loadContainerState(ContainerId containerId, keyPrefix.substring(CONTAINERS_KEY_PREFIX.length(),
LeveldbIterator iter, String keyPrefix) throws IOException { keyPrefix.length()-1));
RecoveredContainerState rcs = new RecoveredContainerState(); RecoveredContainerState rcs = new RecoveredContainerState(containerId);
rcs.status = RecoveredContainerStatus.REQUESTED; rcs.status = RecoveredContainerStatus.REQUESTED;
while (iter.hasNext()) { while (iter.hasNext()) {
Entry<byte[],byte[]> entry = iter.peekNext(); Entry<byte[],byte[]> entry = iter.peekNext();
@ -680,35 +733,45 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} }
// Application Recovery Iterator
private class ApplicationStateIterator extends
BaseRecoveryIterator<ContainerManagerApplicationProto> {
ApplicationStateIterator() throws IOException {
super(APPLICATIONS_KEY_PREFIX);
}
@Override
protected ContainerManagerApplicationProto getNextItem(LeveldbIterator it)
throws IOException {
return getNextRecoveredApplication(it);
}
}
private ContainerManagerApplicationProto getNextRecoveredApplication(
LeveldbIterator it) throws IOException {
ContainerManagerApplicationProto applicationProto = null;
try {
if (it.hasNext()) {
Entry<byte[], byte[]> entry = it.next();
String key = asString(entry.getKey());
if (!key.startsWith(APPLICATIONS_KEY_PREFIX)) {
return null;
}
applicationProto = ContainerManagerApplicationProto.parseFrom(
entry.getValue());
}
} catch (DBException e) {
throw new IOException(e);
}
return applicationProto;
}
@Override @Override
public RecoveredApplicationsState loadApplicationsState() public RecoveredApplicationsState loadApplicationsState()
throws IOException { throws IOException {
RecoveredApplicationsState state = new RecoveredApplicationsState(); RecoveredApplicationsState state = new RecoveredApplicationsState();
state.applications = new ArrayList<ContainerManagerApplicationProto>(); state.it = new ApplicationStateIterator();
String keyPrefix = APPLICATIONS_KEY_PREFIX;
LeveldbIterator iter = null;
try {
iter = new LeveldbIterator(db);
iter.seek(bytes(keyPrefix));
while (iter.hasNext()) {
Entry<byte[], byte[]> entry = iter.next();
String key = asString(entry.getKey());
if (!key.startsWith(keyPrefix)) {
break;
}
state.applications.add(
ContainerManagerApplicationProto.parseFrom(entry.getValue()));
}
} catch (DBException e) {
throw new IOException(e);
} finally {
if (iter != null) {
iter.close();
}
}
cleanupDeprecatedFinishedApps(); cleanupDeprecatedFinishedApps();
return state; return state;
} }
@ -752,24 +815,29 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} }
// User Resource Recovery Iterator.
private class UserResourcesIterator extends
BaseRecoveryIterator<Entry<String, RecoveredUserResources>> {
UserResourcesIterator() throws IOException {
super(LOCALIZATION_PRIVATE_KEY_PREFIX);
}
@Override @Override
public RecoveredLocalizationState loadLocalizationState() protected Entry<String, RecoveredUserResources> getNextItem(
throws IOException { LeveldbIterator it) throws IOException {
RecoveredLocalizationState state = new RecoveredLocalizationState(); return getNextRecoveredPrivateLocalizationEntry(it);
}
}
LeveldbIterator iter = null; private Entry<String, RecoveredUserResources> getNextRecoveredPrivateLocalizationEntry(
LeveldbIterator it) throws IOException {
Entry<String, RecoveredUserResources> localEntry = null;
try { try {
iter = new LeveldbIterator(db); if (it.hasNext()) {
iter.seek(bytes(LOCALIZATION_PUBLIC_KEY_PREFIX)); Entry<byte[], byte[]> entry = it.peekNext();
state.publicTrackerState = loadResourceTrackerState(iter,
LOCALIZATION_PUBLIC_KEY_PREFIX);
iter.seek(bytes(LOCALIZATION_PRIVATE_KEY_PREFIX));
while (iter.hasNext()) {
Entry<byte[],byte[]> entry = iter.peekNext();
String key = asString(entry.getKey()); String key = asString(entry.getKey());
if (!key.startsWith(LOCALIZATION_PRIVATE_KEY_PREFIX)) { if (!key.startsWith(LOCALIZATION_PRIVATE_KEY_PREFIX)) {
break; return null;
} }
int userEndPos = key.indexOf('/', int userEndPos = key.indexOf('/',
@ -780,17 +848,24 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} }
String user = key.substring( String user = key.substring(
LOCALIZATION_PRIVATE_KEY_PREFIX.length(), userEndPos); LOCALIZATION_PRIVATE_KEY_PREFIX.length(), userEndPos);
state.userResources.put(user, loadUserLocalizedResources(iter, RecoveredUserResources val = loadUserLocalizedResources(it,
key.substring(0, userEndPos+1))); key.substring(0, userEndPos+1));
localEntry = new AbstractMap.SimpleEntry<>(user, val);
} }
} catch (DBException e) { } catch (DBException e) {
throw new IOException(e); throw new IOException(e);
} finally {
if (iter != null) {
iter.close();
} }
return localEntry;
} }
@Override
public RecoveredLocalizationState loadLocalizationState()
throws IOException {
RecoveredLocalizationState state = new RecoveredLocalizationState();
LeveldbIterator it = getLevelDBIterator(LOCALIZATION_PUBLIC_KEY_PREFIX);
state.publicTrackerState = loadResourceTrackerState(it,
LOCALIZATION_PUBLIC_KEY_PREFIX);
state.it = new UserResourcesIterator();
return state; return state;
} }
@ -800,7 +875,7 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
final String startedPrefix = keyPrefix + LOCALIZATION_STARTED_SUFFIX; final String startedPrefix = keyPrefix + LOCALIZATION_STARTED_SUFFIX;
LocalResourceTrackerState state = new LocalResourceTrackerState(); LocalResourceTrackerState state = new LocalResourceTrackerState();
while (iter.hasNext()) { while (iter.hasNext()) {
Entry<byte[],byte[]> entry = iter.peekNext(); Entry<byte[], byte[]> entry = iter.peekNext();
String key = asString(entry.getKey()); String key = asString(entry.getKey());
if (!key.startsWith(keyPrefix)) { if (!key.startsWith(keyPrefix)) {
break; break;
@ -981,32 +1056,44 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
+ LOCALIZATION_APPCACHE_SUFFIX + appId + "/"; + LOCALIZATION_APPCACHE_SUFFIX + appId + "/";
} }
// Deletion State Recovery Iterator.
private class DeletionStateIterator extends
BaseRecoveryIterator<DeletionServiceDeleteTaskProto> {
DeletionStateIterator() throws IOException {
super(DELETION_TASK_KEY_PREFIX);
}
@Override
protected DeletionServiceDeleteTaskProto getNextItem(LeveldbIterator it)
throws IOException {
return getNextRecoveredDeletionService(it);
}
}
private DeletionServiceDeleteTaskProto getNextRecoveredDeletionService(
LeveldbIterator it) throws IOException {
DeletionServiceDeleteTaskProto deleteProto = null;
try {
if (it.hasNext()) {
Entry<byte[], byte[]> entry = it.next();
String key = asString(entry.getKey());
if (!key.startsWith(DELETION_TASK_KEY_PREFIX)) {
return null;
}
deleteProto = DeletionServiceDeleteTaskProto.parseFrom(
entry.getValue());
}
} catch (DBException e) {
throw new IOException(e);
}
return deleteProto;
}
@Override @Override
public RecoveredDeletionServiceState loadDeletionServiceState() public RecoveredDeletionServiceState loadDeletionServiceState()
throws IOException { throws IOException {
RecoveredDeletionServiceState state = new RecoveredDeletionServiceState(); RecoveredDeletionServiceState state = new RecoveredDeletionServiceState();
state.tasks = new ArrayList<DeletionServiceDeleteTaskProto>(); state.it = new DeletionStateIterator();
LeveldbIterator iter = null;
try {
iter = new LeveldbIterator(db);
iter.seek(bytes(DELETION_TASK_KEY_PREFIX));
while (iter.hasNext()) {
Entry<byte[], byte[]> entry = iter.next();
String key = asString(entry.getKey());
if (!key.startsWith(DELETION_TASK_KEY_PREFIX)) {
break;
}
state.tasks.add(
DeletionServiceDeleteTaskProto.parseFrom(entry.getValue()));
}
} catch (DBException e) {
throw new IOException(e);
} finally {
if (iter != null) {
iter.close();
}
}
return state; return state;
} }
@ -1033,29 +1120,44 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} }
} }
private MasterKey getMasterKey(String dbKey) throws IOException {
try{
byte[] data = db.get(bytes(dbKey));
if (data == null || data.length == 0) {
return null;
}
return parseMasterKey(data);
} catch (DBException e) {
throw new IOException(e);
}
}
// Recover NMTokens Iterator
private class NMTokensStateIterator extends
BaseRecoveryIterator<Entry<ApplicationAttemptId, MasterKey>> {
NMTokensStateIterator() throws IOException {
super(NM_TOKENS_KEY_PREFIX);
}
@Override @Override
public RecoveredNMTokensState loadNMTokensState() throws IOException { protected Entry<ApplicationAttemptId, MasterKey> getNextItem(
RecoveredNMTokensState state = new RecoveredNMTokensState(); LeveldbIterator it) throws IOException {
state.applicationMasterKeys = return getNextMasterKeyEntry(it);
new HashMap<ApplicationAttemptId, MasterKey>(); }
LeveldbIterator iter = null; }
private Entry<ApplicationAttemptId, MasterKey> getNextMasterKeyEntry(
LeveldbIterator it) throws IOException {
Entry<ApplicationAttemptId, MasterKey> masterKeyentry = null;
try { try {
iter = new LeveldbIterator(db); while (it.hasNext()) {
iter.seek(bytes(NM_TOKENS_KEY_PREFIX)); Entry<byte[], byte[]> entry = it.next();
while (iter.hasNext()) {
Entry<byte[], byte[]> entry = iter.next();
String fullKey = asString(entry.getKey()); String fullKey = asString(entry.getKey());
if (!fullKey.startsWith(NM_TOKENS_KEY_PREFIX)) { if (!fullKey.startsWith(NM_TOKENS_KEY_PREFIX)) {
break; break;
} }
String key = fullKey.substring(NM_TOKENS_KEY_PREFIX.length()); String key = fullKey.substring(NM_TOKENS_KEY_PREFIX.length());
if (key.equals(CURRENT_MASTER_KEY_SUFFIX)) { if (key.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) {
state.currentMasterKey = parseMasterKey(entry.getValue());
} else if (key.equals(PREV_MASTER_KEY_SUFFIX)) {
state.previousMasterKey = parseMasterKey(entry.getValue());
} else if (key.startsWith(
ApplicationAttemptId.appAttemptIdStrPrefix)) {
ApplicationAttemptId attempt; ApplicationAttemptId attempt;
try { try {
attempt = ApplicationAttemptId.fromString(key); attempt = ApplicationAttemptId.fromString(key);
@ -1063,17 +1165,25 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
throw new IOException("Bad application master key state for " throw new IOException("Bad application master key state for "
+ fullKey, e); + fullKey, e);
} }
state.applicationMasterKeys.put(attempt, masterKeyentry = new AbstractMap.SimpleEntry<>(attempt,
parseMasterKey(entry.getValue())); parseMasterKey(entry.getValue()));
break;
} }
} }
} catch (DBException e) { } catch (DBException e) {
throw new IOException(e); throw new IOException(e);
} finally {
if (iter != null) {
iter.close();
} }
return masterKeyentry;
} }
@Override
public RecoveredNMTokensState loadNMTokensState() throws IOException {
RecoveredNMTokensState state = new RecoveredNMTokensState();
state.currentMasterKey = getMasterKey(NM_TOKENS_KEY_PREFIX
+ CURRENT_MASTER_KEY_SUFFIX);
state.previousMasterKey = getMasterKey(NM_TOKENS_KEY_PREFIX
+ PREV_MASTER_KEY_SUFFIX);
state.it = new NMTokensStateIterator();
return state; return state;
} }
@ -1122,45 +1232,45 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} }
} }
// Recover ContainersToken Iterator.
private class ContainerTokensStateIterator extends
BaseRecoveryIterator<Entry<ContainerId, Long>> {
ContainerTokensStateIterator() throws IOException {
super(CONTAINER_TOKENS_KEY_PREFIX);
}
@Override @Override
public RecoveredContainerTokensState loadContainerTokensState() protected Entry<ContainerId, Long> getNextItem(LeveldbIterator it)
throws IOException { throws IOException {
RecoveredContainerTokensState state = new RecoveredContainerTokensState(); return getNextContainerToken(it);
state.activeTokens = new HashMap<ContainerId, Long>(); }
LeveldbIterator iter = null; }
private Entry<ContainerId, Long> getNextContainerToken(LeveldbIterator it)
throws IOException {
Entry<ContainerId, Long> containerTokenEntry = null;
try { try {
iter = new LeveldbIterator(db); while (it.hasNext()) {
iter.seek(bytes(CONTAINER_TOKENS_KEY_PREFIX)); Entry<byte[], byte[]> entry = it.next();
final int containerTokensKeyPrefixLength =
CONTAINER_TOKENS_KEY_PREFIX.length();
while (iter.hasNext()) {
Entry<byte[], byte[]> entry = iter.next();
String fullKey = asString(entry.getKey()); String fullKey = asString(entry.getKey());
if (!fullKey.startsWith(CONTAINER_TOKENS_KEY_PREFIX)) { if (!fullKey.startsWith(CONTAINER_TOKENS_KEY_PREFIX)) {
break; break;
} }
String key = fullKey.substring(containerTokensKeyPrefixLength); String key = fullKey.substring(CONTAINER_TOKENS_KEY_PREFIX.length());
if (key.equals(CURRENT_MASTER_KEY_SUFFIX)) { if (key.startsWith(ConverterUtils.CONTAINER_PREFIX)) {
state.currentMasterKey = parseMasterKey(entry.getValue()); containerTokenEntry = loadContainerToken(fullKey, key,
} else if (key.equals(PREV_MASTER_KEY_SUFFIX)) { entry.getValue());
state.previousMasterKey = parseMasterKey(entry.getValue()); break;
} else if (key.startsWith(ConverterUtils.CONTAINER_PREFIX)) {
loadContainerToken(state, fullKey, key, entry.getValue());
} }
} }
} catch (DBException e) { } catch (DBException e) {
throw new IOException(e); throw new IOException(e);
} finally {
if (iter != null) {
iter.close();
} }
} return containerTokenEntry;
return state;
} }
private static void loadContainerToken(RecoveredContainerTokensState state, private static Entry<ContainerId, Long> loadContainerToken(String key,
String key, String containerIdStr, byte[] value) throws IOException { String containerIdStr, byte[] value) throws IOException {
ContainerId containerId; ContainerId containerId;
Long expTime; Long expTime;
try { try {
@ -1169,7 +1279,19 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
throw new IOException("Bad container token state for " + key, e); throw new IOException("Bad container token state for " + key, e);
} }
state.activeTokens.put(containerId, expTime); return new AbstractMap.SimpleEntry<>(containerId, expTime);
}
@Override
public RecoveredContainerTokensState loadContainerTokensState()
throws IOException {
RecoveredContainerTokensState state = new RecoveredContainerTokensState();
state.currentMasterKey = getMasterKey(CONTAINER_TOKENS_KEY_PREFIX
+ CURRENT_MASTER_KEY_SUFFIX);
state.previousMasterKey = getMasterKey(CONTAINER_TOKENS_KEY_PREFIX
+ PREV_MASTER_KEY_SUFFIX);
state.it = new ContainerTokensStateIterator();
return state;
} }
@Override @Override

View File

@ -65,7 +65,7 @@ public class NMNullStateStoreService extends NMStateStoreService {
} }
@Override @Override
public List<RecoveredContainerState> loadContainersState() public RecoveryIterator<RecoveredContainerState> getContainerStateIterator()
throws IOException { throws IOException {
throw new UnsupportedOperationException( throw new UnsupportedOperationException(
"Recovery not supported by this state store"); "Recovery not supported by this state store");

View File

@ -24,6 +24,7 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.classification.InterfaceStability.Unstable;
@ -67,12 +68,11 @@ public abstract class NMStateStoreService extends AbstractService {
} }
public static class RecoveredApplicationsState { public static class RecoveredApplicationsState {
List<ContainerManagerApplicationProto> applications; RecoveryIterator<ContainerManagerApplicationProto> it = null;
public List<ContainerManagerApplicationProto> getApplications() { public RecoveryIterator<ContainerManagerApplicationProto> getIterator() {
return applications; return it;
} }
} }
/** /**
@ -106,6 +106,15 @@ public abstract class NMStateStoreService extends AbstractService {
RecoveredContainerType.RECOVER; RecoveredContainerType.RECOVER;
private long startTime; private long startTime;
private ResourceMappings resMappings = new ResourceMappings(); private ResourceMappings resMappings = new ResourceMappings();
private final ContainerId containerId;
RecoveredContainerState(ContainerId containerId){
this.containerId = containerId;
}
public ContainerId getContainerId() {
return containerId;
}
public RecoveredContainerStatus getStatus() { public RecoveredContainerStatus getStatus() {
return status; return status;
@ -248,30 +257,33 @@ public abstract class NMStateStoreService extends AbstractService {
public static class RecoveredLocalizationState { public static class RecoveredLocalizationState {
LocalResourceTrackerState publicTrackerState = LocalResourceTrackerState publicTrackerState =
new LocalResourceTrackerState(); new LocalResourceTrackerState();
Map<String, RecoveredUserResources> userResources = RecoveryIterator<Entry<String, RecoveredUserResources>> it = null;
new HashMap<String, RecoveredUserResources>();
public LocalResourceTrackerState getPublicTrackerState() { public LocalResourceTrackerState getPublicTrackerState() {
return publicTrackerState; return publicTrackerState;
} }
public Map<String, RecoveredUserResources> getUserResources() { public RecoveryIterator<Entry<String, RecoveredUserResources>> getIterator() {
return userResources; return it;
} }
} }
public static class RecoveredDeletionServiceState { public static class RecoveredDeletionServiceState {
List<DeletionServiceDeleteTaskProto> tasks; RecoveryIterator<DeletionServiceDeleteTaskProto> it = null;
public List<DeletionServiceDeleteTaskProto> getTasks() { public RecoveryIterator<DeletionServiceDeleteTaskProto> getIterator(){
return tasks; return it;
} }
} }
public static class RecoveredNMTokensState { public static class RecoveredNMTokensState {
MasterKey currentMasterKey; MasterKey currentMasterKey;
MasterKey previousMasterKey; MasterKey previousMasterKey;
Map<ApplicationAttemptId, MasterKey> applicationMasterKeys; RecoveryIterator<Entry<ApplicationAttemptId, MasterKey>> it = null;
public RecoveryIterator<Entry<ApplicationAttemptId, MasterKey>> getIterator() {
return it;
}
public MasterKey getCurrentMasterKey() { public MasterKey getCurrentMasterKey() {
return currentMasterKey; return currentMasterKey;
@ -281,15 +293,16 @@ public abstract class NMStateStoreService extends AbstractService {
return previousMasterKey; return previousMasterKey;
} }
public Map<ApplicationAttemptId, MasterKey> getApplicationMasterKeys() {
return applicationMasterKeys;
}
} }
public static class RecoveredContainerTokensState { public static class RecoveredContainerTokensState {
MasterKey currentMasterKey; MasterKey currentMasterKey;
MasterKey previousMasterKey; MasterKey previousMasterKey;
Map<ContainerId, Long> activeTokens; RecoveryIterator<Entry<ContainerId, Long>> it = null;
public RecoveryIterator<Entry<ContainerId, Long>> getIterator() {
return it;
}
public MasterKey getCurrentMasterKey() { public MasterKey getCurrentMasterKey() {
return currentMasterKey; return currentMasterKey;
@ -299,9 +312,6 @@ public abstract class NMStateStoreService extends AbstractService {
return previousMasterKey; return previousMasterKey;
} }
public Map<ContainerId, Long> getActiveTokens() {
return activeTokens;
}
} }
public static class RecoveredLogDeleterState { public static class RecoveredLogDeleterState {
@ -400,11 +410,10 @@ public abstract class NMStateStoreService extends AbstractService {
/** /**
* Load the state of containers * get the Recovered Container State Iterator
* @return recovered state for containers * @return recovery iterator
* @throws IOException
*/ */
public abstract List<RecoveredContainerState> loadContainersState() public abstract RecoveryIterator<RecoveredContainerState> getContainerStateIterator()
throws IOException; throws IOException;
/** /**

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.recovery;
import java.io.Closeable;
import java.io.IOException;
import java.util.NoSuchElementException;
/**
* A wrapper for a Iterator to translate the raw RuntimeExceptions that
* can be thrown into IOException.
*/
public interface RecoveryIterator<T> extends Closeable {
/**
* Returns true if the iteration has more elements.
*/
boolean hasNext() throws IOException;
/**
* Returns the next element in the iteration.
*/
T next() throws IOException, NoSuchElementException;
}

View File

@ -24,6 +24,8 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -90,7 +92,9 @@ public class NMContainerTokenSecretManager extends
super.serialNo = super.currentMasterKey.getMasterKey().getKeyId() + 1; super.serialNo = super.currentMasterKey.getMasterKey().getKeyId() + 1;
} }
for (Entry<ContainerId, Long> entry : state.getActiveTokens().entrySet()) { try (RecoveryIterator<Entry<ContainerId, Long>> it = state.getIterator()) {
while (it.hasNext()) {
Entry<ContainerId, Long> entry = it.next();
ContainerId containerId = entry.getKey(); ContainerId containerId = entry.getKey();
Long expTime = entry.getValue(); Long expTime = entry.getValue();
List<ContainerId> containerList = List<ContainerId> containerList =
@ -104,6 +108,7 @@ public class NMContainerTokenSecretManager extends
} }
} }
} }
}
private void updateCurrentMasterKey(MasterKeyData key) { private void updateCurrentMasterKey(MasterKeyData key) {
super.currentMasterKey = key; super.currentMasterKey = key;

View File

@ -23,6 +23,8 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.hadoop.yarn.server.nodemanager.recovery.RecoveryIterator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -87,12 +89,15 @@ public class NMTokenSecretManagerInNM extends BaseNMTokenSecretManager {
super.serialNo = super.currentMasterKey.getMasterKey().getKeyId() + 1; super.serialNo = super.currentMasterKey.getMasterKey().getKeyId() + 1;
} }
for (Map.Entry<ApplicationAttemptId, MasterKey> entry : try (RecoveryIterator<Map.Entry<ApplicationAttemptId, MasterKey>> it =
state.getApplicationMasterKeys().entrySet()) { state.getIterator()) {
while (it.hasNext()) {
Map.Entry<ApplicationAttemptId, MasterKey> entry = it.next();
key = entry.getValue(); key = entry.getValue();
oldMasterKeys.put(entry.getKey(), oldMasterKeys.put(entry.getKey(),
new MasterKeyData(key, createSecretKey(key.getBytes().array()))); new MasterKeyData(key, createSecretKey(key.getBytes().array())));
} }
}
// reconstruct app to app attempts map // reconstruct app to app attempts map
appToAppAttemptMap.clear(); appToAppAttemptMap.clear();

View File

@ -23,6 +23,7 @@ import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -56,6 +57,8 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
private Map<Integer, DeletionServiceDeleteTaskProto> deleteTasks; private Map<Integer, DeletionServiceDeleteTaskProto> deleteTasks;
private RecoveredNMTokensState nmTokenState; private RecoveredNMTokensState nmTokenState;
private RecoveredContainerTokensState containerTokenState; private RecoveredContainerTokensState containerTokenState;
private Map<ApplicationAttemptId, MasterKey> applicationMasterKeys;
private Map<ContainerId, Long> activeTokens;
private Map<ApplicationId, LogDeleterProto> logDeleterState; private Map<ApplicationId, LogDeleterProto> logDeleterState;
private RecoveredAMRMProxyState amrmProxyState; private RecoveredAMRMProxyState amrmProxyState;
@ -68,10 +71,9 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
apps = new HashMap<ApplicationId, ContainerManagerApplicationProto>(); apps = new HashMap<ApplicationId, ContainerManagerApplicationProto>();
containerStates = new HashMap<ContainerId, RecoveredContainerState>(); containerStates = new HashMap<ContainerId, RecoveredContainerState>();
nmTokenState = new RecoveredNMTokensState(); nmTokenState = new RecoveredNMTokensState();
nmTokenState.applicationMasterKeys = applicationMasterKeys = new HashMap<ApplicationAttemptId, MasterKey>();
new HashMap<ApplicationAttemptId, MasterKey>();
containerTokenState = new RecoveredContainerTokensState(); containerTokenState = new RecoveredContainerTokensState();
containerTokenState.activeTokens = new HashMap<ContainerId, Long>(); activeTokens = new HashMap<ContainerId, Long>();
trackerStates = new HashMap<TrackerKey, TrackerState>(); trackerStates = new HashMap<TrackerKey, TrackerState>();
deleteTasks = new HashMap<Integer, DeletionServiceDeleteTaskProto>(); deleteTasks = new HashMap<Integer, DeletionServiceDeleteTaskProto>();
logDeleterState = new HashMap<ApplicationId, LogDeleterProto>(); logDeleterState = new HashMap<ApplicationId, LogDeleterProto>();
@ -86,13 +88,39 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
protected void closeStorage() { protected void closeStorage() {
} }
// Recovery Iterator Implementation.
private class NMMemoryRecoveryIterator<T> implements RecoveryIterator<T> {
private Iterator<T> it;
NMMemoryRecoveryIterator(Iterator<T> it){
this.it = it;
}
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public T next() throws IOException {
return it.next();
}
@Override
public void close() throws IOException {
}
}
@Override @Override
public synchronized RecoveredApplicationsState loadApplicationsState() public synchronized RecoveredApplicationsState loadApplicationsState()
throws IOException { throws IOException {
RecoveredApplicationsState state = new RecoveredApplicationsState(); RecoveredApplicationsState state = new RecoveredApplicationsState();
state.applications = new ArrayList<ContainerManagerApplicationProto>( List<ContainerManagerApplicationProto> containerList =
apps.values()); new ArrayList<ContainerManagerApplicationProto>(apps.values());
state.it = new NMMemoryRecoveryIterator<ContainerManagerApplicationProto>(
containerList.iterator());
return state; return state;
} }
@ -111,13 +139,13 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
} }
@Override @Override
public synchronized List<RecoveredContainerState> loadContainersState() public RecoveryIterator<RecoveredContainerState> getContainerStateIterator()
throws IOException { throws IOException {
// return a copy so caller can't modify our state // return a copy so caller can't modify our state
List<RecoveredContainerState> result = List<RecoveredContainerState> result =
new ArrayList<RecoveredContainerState>(containerStates.size()); new ArrayList<RecoveredContainerState>(containerStates.size());
for (RecoveredContainerState rcs : containerStates.values()) { for (RecoveredContainerState rcs : containerStates.values()) {
RecoveredContainerState rcsCopy = new RecoveredContainerState(); RecoveredContainerState rcsCopy = new RecoveredContainerState(rcs.getContainerId());
rcsCopy.status = rcs.status; rcsCopy.status = rcs.status;
rcsCopy.exitCode = rcs.exitCode; rcsCopy.exitCode = rcs.exitCode;
rcsCopy.killed = rcs.killed; rcsCopy.killed = rcs.killed;
@ -131,13 +159,14 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
rcsCopy.setResourceMappings(rcs.getResourceMappings()); rcsCopy.setResourceMappings(rcs.getResourceMappings());
result.add(rcsCopy); result.add(rcsCopy);
} }
return result; return new NMMemoryRecoveryIterator<RecoveredContainerState>(
result.iterator());
} }
@Override @Override
public synchronized void storeContainer(ContainerId containerId, public synchronized void storeContainer(ContainerId containerId,
int version, long startTime, StartContainerRequest startRequest) { int version, long startTime, StartContainerRequest startRequest) {
RecoveredContainerState rcs = new RecoveredContainerState(); RecoveredContainerState rcs = new RecoveredContainerState(containerId);
rcs.startRequest = startRequest; rcs.startRequest = startRequest;
rcs.version = version; rcs.version = version;
try { try {
@ -284,6 +313,8 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
@Override @Override
public synchronized RecoveredLocalizationState loadLocalizationState() { public synchronized RecoveredLocalizationState loadLocalizationState() {
RecoveredLocalizationState result = new RecoveredLocalizationState(); RecoveredLocalizationState result = new RecoveredLocalizationState();
Map<String, RecoveredUserResources> userResources =
new HashMap<String, RecoveredUserResources>();
for (Map.Entry<TrackerKey, TrackerState> e : trackerStates.entrySet()) { for (Map.Entry<TrackerKey, TrackerState> e : trackerStates.entrySet()) {
TrackerKey tk = e.getKey(); TrackerKey tk = e.getKey();
TrackerState ts = e.getValue(); TrackerState ts = e.getValue();
@ -294,10 +325,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
if (tk.user == null) { if (tk.user == null) {
result.publicTrackerState = loadTrackerState(ts); result.publicTrackerState = loadTrackerState(ts);
} else { } else {
RecoveredUserResources rur = result.userResources.get(tk.user); RecoveredUserResources rur = userResources.get(tk.user);
if (rur == null) { if (rur == null) {
rur = new RecoveredUserResources(); rur = new RecoveredUserResources();
result.userResources.put(tk.user, rur); userResources.put(tk.user, rur);
} }
if (tk.appId == null) { if (tk.appId == null) {
rur.privateTrackerState = loadTrackerState(ts); rur.privateTrackerState = loadTrackerState(ts);
@ -306,6 +337,8 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
} }
} }
} }
result.it = new NMMemoryRecoveryIterator<Map.Entry<String, RecoveredUserResources>>(
userResources.entrySet().iterator());
return result; return result;
} }
@ -341,8 +374,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
throws IOException { throws IOException {
RecoveredDeletionServiceState result = RecoveredDeletionServiceState result =
new RecoveredDeletionServiceState(); new RecoveredDeletionServiceState();
result.tasks = new ArrayList<DeletionServiceDeleteTaskProto>( List<DeletionServiceDeleteTaskProto> deleteTaskProtos =
deleteTasks.values()); new ArrayList<DeletionServiceDeleteTaskProto>(deleteTasks.values());
result.it = new NMMemoryRecoveryIterator<DeletionServiceDeleteTaskProto>(
deleteTaskProtos.iterator());
return result; return result;
} }
@ -365,9 +400,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
RecoveredNMTokensState result = new RecoveredNMTokensState(); RecoveredNMTokensState result = new RecoveredNMTokensState();
result.currentMasterKey = nmTokenState.currentMasterKey; result.currentMasterKey = nmTokenState.currentMasterKey;
result.previousMasterKey = nmTokenState.previousMasterKey; result.previousMasterKey = nmTokenState.previousMasterKey;
result.applicationMasterKeys = Map<ApplicationAttemptId, MasterKey> masterKeysMap =
new HashMap<ApplicationAttemptId, MasterKey>( new HashMap<ApplicationAttemptId, MasterKey>(applicationMasterKeys);
nmTokenState.applicationMasterKeys); result.it = new NMMemoryRecoveryIterator<Map.Entry<ApplicationAttemptId, MasterKey>>(
masterKeysMap.entrySet().iterator());
return result; return result;
} }
@ -389,14 +425,14 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
public synchronized void storeNMTokenApplicationMasterKey( public synchronized void storeNMTokenApplicationMasterKey(
ApplicationAttemptId attempt, MasterKey key) throws IOException { ApplicationAttemptId attempt, MasterKey key) throws IOException {
MasterKeyPBImpl keypb = (MasterKeyPBImpl) key; MasterKeyPBImpl keypb = (MasterKeyPBImpl) key;
nmTokenState.applicationMasterKeys.put(attempt, applicationMasterKeys.put(attempt,
new MasterKeyPBImpl(keypb.getProto())); new MasterKeyPBImpl(keypb.getProto()));
} }
@Override @Override
public synchronized void removeNMTokenApplicationMasterKey( public synchronized void removeNMTokenApplicationMasterKey(
ApplicationAttemptId attempt) throws IOException { ApplicationAttemptId attempt) throws IOException {
nmTokenState.applicationMasterKeys.remove(attempt); applicationMasterKeys.remove(attempt);
} }
@ -408,8 +444,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
new RecoveredContainerTokensState(); new RecoveredContainerTokensState();
result.currentMasterKey = containerTokenState.currentMasterKey; result.currentMasterKey = containerTokenState.currentMasterKey;
result.previousMasterKey = containerTokenState.previousMasterKey; result.previousMasterKey = containerTokenState.previousMasterKey;
result.activeTokens = Map<ContainerId, Long> containersTokenMap =
new HashMap<ContainerId, Long>(containerTokenState.activeTokens); new HashMap<ContainerId, Long>(activeTokens);
result.it = new NMMemoryRecoveryIterator<Map.Entry<ContainerId, Long>>(
containersTokenMap.entrySet().iterator());
return result; return result;
} }
@ -432,13 +470,13 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
@Override @Override
public synchronized void storeContainerToken(ContainerId containerId, public synchronized void storeContainerToken(ContainerId containerId,
Long expirationTime) throws IOException { Long expirationTime) throws IOException {
containerTokenState.activeTokens.put(containerId, expirationTime); activeTokens.put(containerId, expirationTime);
} }
@Override @Override
public synchronized void removeContainerToken(ContainerId containerId) public synchronized void removeContainerToken(ContainerId containerId)
throws IOException { throws IOException {
containerTokenState.activeTokens.remove(containerId); activeTokens.remove(containerId);
} }

View File

@ -125,6 +125,73 @@ public class TestNMLeveldbStateStoreService {
FileUtil.fullyDelete(TMP_DIR); FileUtil.fullyDelete(TMP_DIR);
} }
private List<RecoveredContainerState> loadContainersState(
RecoveryIterator<RecoveredContainerState> it) throws IOException {
List<RecoveredContainerState> containers =
new ArrayList<RecoveredContainerState>();
while (it.hasNext()) {
RecoveredContainerState rcs = it.next();
containers.add(rcs);
}
return containers;
}
private List<ContainerManagerApplicationProto> loadApplicationProtos(
RecoveryIterator<ContainerManagerApplicationProto> it)
throws IOException {
List<ContainerManagerApplicationProto> applicationProtos =
new ArrayList<ContainerManagerApplicationProto>();
while (it.hasNext()) {
applicationProtos.add(it.next());
}
return applicationProtos;
}
private List<DeletionServiceDeleteTaskProto> loadDeletionTaskProtos(
RecoveryIterator<DeletionServiceDeleteTaskProto> it) throws IOException {
List<DeletionServiceDeleteTaskProto> deleteTaskProtos =
new ArrayList<DeletionServiceDeleteTaskProto>();
while (it.hasNext()) {
deleteTaskProtos.add(it.next());
}
return deleteTaskProtos;
}
private Map<String, RecoveredUserResources> loadUserResources(
RecoveryIterator<Map.Entry<String, RecoveredUserResources>> it)
throws IOException {
Map<String, RecoveredUserResources> userResources =
new HashMap<String, RecoveredUserResources>();
while (it.hasNext()) {
Map.Entry<String, RecoveredUserResources> entry = it.next();
userResources.put(entry.getKey(), entry.getValue());
}
return userResources;
}
private Map<ApplicationAttemptId, MasterKey> loadNMTokens(
RecoveryIterator<Map.Entry<ApplicationAttemptId, MasterKey>> it)
throws IOException {
Map<ApplicationAttemptId, MasterKey> nmTokens =
new HashMap<ApplicationAttemptId, MasterKey>();
while (it.hasNext()) {
Map.Entry<ApplicationAttemptId, MasterKey> entry = it.next();
nmTokens.put(entry.getKey(), entry.getValue());
}
return nmTokens;
}
private Map<ContainerId, Long> loadContainerTokens(
RecoveryIterator<Map.Entry<ContainerId, Long>> it) throws IOException {
Map<ContainerId, Long> containerTokens =
new HashMap<ContainerId, Long>();
while (it.hasNext()) {
Map.Entry<ContainerId, Long> entry = it.next();
containerTokens.put(entry.getKey(), entry.getValue());
}
return containerTokens;
}
private void restartStateStore() throws IOException { private void restartStateStore() throws IOException {
// need to close so leveldb releases database lock // need to close so leveldb releases database lock
if (stateStore != null) { if (stateStore != null) {
@ -142,7 +209,7 @@ public class TestNMLeveldbStateStoreService {
assertNotNull(pubts); assertNotNull(pubts);
assertTrue(pubts.getLocalizedResources().isEmpty()); assertTrue(pubts.getLocalizedResources().isEmpty());
assertTrue(pubts.getInProgressResources().isEmpty()); assertTrue(pubts.getInProgressResources().isEmpty());
assertTrue(state.getUserResources().isEmpty()); assertTrue(loadUserResources(state.getIterator()).isEmpty());
} }
@Test @Test
@ -192,7 +259,9 @@ public class TestNMLeveldbStateStoreService {
public void testApplicationStorage() throws IOException { public void testApplicationStorage() throws IOException {
// test empty when no state // test empty when no state
RecoveredApplicationsState state = stateStore.loadApplicationsState(); RecoveredApplicationsState state = stateStore.loadApplicationsState();
assertTrue(state.getApplications().isEmpty()); List<ContainerManagerApplicationProto> apps =
loadApplicationProtos(state.getIterator());
assertTrue(apps.isEmpty());
// store an application and verify recovered // store an application and verify recovered
final ApplicationId appId1 = ApplicationId.newInstance(1234, 1); final ApplicationId appId1 = ApplicationId.newInstance(1234, 1);
@ -204,8 +273,9 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeApplication(appId1, appProto1); stateStore.storeApplication(appId1, appProto1);
restartStateStore(); restartStateStore();
state = stateStore.loadApplicationsState(); state = stateStore.loadApplicationsState();
assertEquals(1, state.getApplications().size()); apps = loadApplicationProtos(state.getIterator());
assertEquals(appProto1, state.getApplications().get(0)); assertEquals(1, apps.size());
assertEquals(appProto1, apps.get(0));
// add a new app // add a new app
final ApplicationId appId2 = ApplicationId.newInstance(1234, 2); final ApplicationId appId2 = ApplicationId.newInstance(1234, 2);
@ -216,23 +286,25 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeApplication(appId2, appProto2); stateStore.storeApplication(appId2, appProto2);
restartStateStore(); restartStateStore();
state = stateStore.loadApplicationsState(); state = stateStore.loadApplicationsState();
assertEquals(2, state.getApplications().size()); apps = loadApplicationProtos(state.getIterator());
assertTrue(state.getApplications().contains(appProto1)); assertEquals(2, apps.size());
assertTrue(state.getApplications().contains(appProto2)); assertTrue(apps.contains(appProto1));
assertTrue(apps.contains(appProto2));
// test removing an application // test removing an application
stateStore.removeApplication(appId2); stateStore.removeApplication(appId2);
restartStateStore(); restartStateStore();
state = stateStore.loadApplicationsState(); state = stateStore.loadApplicationsState();
assertEquals(1, state.getApplications().size()); apps = loadApplicationProtos(state.getIterator());
assertEquals(appProto1, state.getApplications().get(0)); assertEquals(1, apps.size());
assertEquals(appProto1, apps.get(0));
} }
@Test @Test
public void testContainerStorage() throws IOException { public void testContainerStorage() throws IOException {
// test empty when no state // test empty when no state
List<RecoveredContainerState> recoveredContainers = List<RecoveredContainerState> recoveredContainers =
stateStore.loadContainersState(); loadContainersState(stateStore.getContainerStateIterator());
assertTrue(recoveredContainers.isEmpty()); assertTrue(recoveredContainers.isEmpty());
// create a container request // create a container request
@ -254,7 +326,8 @@ public class TestNMLeveldbStateStoreService {
stateStore.getContainerVersionKey(containerId.toString())))); stateStore.getContainerVersionKey(containerId.toString()))));
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
RecoveredContainerState rcs = recoveredContainers.get(0); RecoveredContainerState rcs = recoveredContainers.get(0);
assertEquals(0, rcs.getVersion()); assertEquals(0, rcs.getVersion());
@ -269,14 +342,16 @@ public class TestNMLeveldbStateStoreService {
// store a new container record without StartContainerRequest // store a new container record without StartContainerRequest
ContainerId containerId1 = ContainerId.newContainerId(appAttemptId, 6); ContainerId containerId1 = ContainerId.newContainerId(appAttemptId, 6);
stateStore.storeContainerLaunched(containerId1); stateStore.storeContainerLaunched(containerId1);
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
// check whether the new container record is discarded // check whether the new container record is discarded
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
// queue the container, and verify recovered // queue the container, and verify recovered
stateStore.storeContainerQueued(containerId); stateStore.storeContainerQueued(containerId);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.QUEUED, rcs.getStatus()); assertEquals(RecoveredContainerStatus.QUEUED, rcs.getStatus());
@ -292,7 +367,8 @@ public class TestNMLeveldbStateStoreService {
diags.append("some diags for container"); diags.append("some diags for container");
stateStore.storeContainerDiagnostics(containerId, diags); stateStore.storeContainerDiagnostics(containerId, diags);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus()); assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus());
@ -305,7 +381,8 @@ public class TestNMLeveldbStateStoreService {
// pause the container, and verify recovered // pause the container, and verify recovered
stateStore.storeContainerPaused(containerId); stateStore.storeContainerPaused(containerId);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.PAUSED, rcs.getStatus()); assertEquals(RecoveredContainerStatus.PAUSED, rcs.getStatus());
@ -316,7 +393,8 @@ public class TestNMLeveldbStateStoreService {
// Resume the container // Resume the container
stateStore.removeContainerPaused(containerId); stateStore.removeContainerPaused(containerId);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
// increase the container size, and verify recovered // increase the container size, and verify recovered
@ -328,7 +406,8 @@ public class TestNMLeveldbStateStoreService {
stateStore stateStore
.storeContainerUpdateToken(containerId, updateTokenIdentifier); .storeContainerUpdateToken(containerId, updateTokenIdentifier);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(0, rcs.getVersion()); assertEquals(0, rcs.getVersion());
@ -342,7 +421,8 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerDiagnostics(containerId, diags); stateStore.storeContainerDiagnostics(containerId, diags);
stateStore.storeContainerKilled(containerId); stateStore.storeContainerKilled(containerId);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus()); assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus());
@ -358,7 +438,8 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerDiagnostics(containerId, diags); stateStore.storeContainerDiagnostics(containerId, diags);
stateStore.storeContainerCompleted(containerId, 21); stateStore.storeContainerCompleted(containerId, 21);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.COMPLETED, rcs.getStatus()); assertEquals(RecoveredContainerStatus.COMPLETED, rcs.getStatus());
@ -371,7 +452,8 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerWorkDir(containerId, "/test/workdir"); stateStore.storeContainerWorkDir(containerId, "/test/workdir");
stateStore.storeContainerLogDir(containerId, "/test/logdir"); stateStore.storeContainerLogDir(containerId, "/test/logdir");
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
rcs = recoveredContainers.get(0); rcs = recoveredContainers.get(0);
assertEquals(6, rcs.getRemainingRetryAttempts()); assertEquals(6, rcs.getRemainingRetryAttempts());
@ -382,12 +464,13 @@ public class TestNMLeveldbStateStoreService {
// remove the container and verify not recovered // remove the container and verify not recovered
stateStore.removeContainer(containerId); stateStore.removeContainer(containerId);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertTrue(recoveredContainers.isEmpty()); assertTrue(recoveredContainers.isEmpty());
// recover again to check remove clears all containers // recover again to check remove clears all containers
restartStateStore(); restartStateStore();
NMStateStoreService nmStoreSpy = spy(stateStore); NMStateStoreService nmStoreSpy = spy(stateStore);
nmStoreSpy.loadContainersState(); loadContainersState(nmStoreSpy.getContainerStateIterator());
verify(nmStoreSpy,times(0)).removeContainer(any(ContainerId.class)); verify(nmStoreSpy,times(0)).removeContainer(any(ContainerId.class));
} }
@ -399,7 +482,8 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerRestartTimes(containerId, stateStore.storeContainerRestartTimes(containerId,
finishTimeForRetryAttempts); finishTimeForRetryAttempts);
restartStateStore(); restartStateStore();
RecoveredContainerState rcs = stateStore.loadContainersState().get(0); RecoveredContainerState rcs =
loadContainersState(stateStore.getContainerStateIterator()).get(0);
List<Long> recoveredRestartTimes = rcs.getRestartTimes(); List<Long> recoveredRestartTimes = rcs.getRestartTimes();
assertEquals(1462700529039L, (long)recoveredRestartTimes.get(0)); assertEquals(1462700529039L, (long)recoveredRestartTimes.get(0));
assertEquals(1462700529050L, (long)recoveredRestartTimes.get(1)); assertEquals(1462700529050L, (long)recoveredRestartTimes.get(1));
@ -481,7 +565,7 @@ public class TestNMLeveldbStateStoreService {
assertTrue(pubts.getLocalizedResources().isEmpty()); assertTrue(pubts.getLocalizedResources().isEmpty());
assertTrue(pubts.getInProgressResources().isEmpty()); assertTrue(pubts.getInProgressResources().isEmpty());
Map<String, RecoveredUserResources> userResources = Map<String, RecoveredUserResources> userResources =
state.getUserResources(); loadUserResources(state.getIterator());
assertEquals(1, userResources.size()); assertEquals(1, userResources.size());
RecoveredUserResources rur = userResources.get(user); RecoveredUserResources rur = userResources.get(user);
LocalResourceTrackerState privts = rur.getPrivateTrackerState(); LocalResourceTrackerState privts = rur.getPrivateTrackerState();
@ -535,7 +619,7 @@ public class TestNMLeveldbStateStoreService {
pubts.getInProgressResources().get(pubRsrcProto1)); pubts.getInProgressResources().get(pubRsrcProto1));
assertEquals(pubRsrcLocalPath2, assertEquals(pubRsrcLocalPath2,
pubts.getInProgressResources().get(pubRsrcProto2)); pubts.getInProgressResources().get(pubRsrcProto2));
userResources = state.getUserResources(); userResources = loadUserResources(state.getIterator());
assertEquals(1, userResources.size()); assertEquals(1, userResources.size());
rur = userResources.get(user); rur = userResources.get(user);
privts = rur.getPrivateTrackerState(); privts = rur.getPrivateTrackerState();
@ -584,7 +668,7 @@ public class TestNMLeveldbStateStoreService {
assertTrue(pubts.getLocalizedResources().isEmpty()); assertTrue(pubts.getLocalizedResources().isEmpty());
assertTrue(pubts.getInProgressResources().isEmpty()); assertTrue(pubts.getInProgressResources().isEmpty());
Map<String, RecoveredUserResources> userResources = Map<String, RecoveredUserResources> userResources =
state.getUserResources(); loadUserResources(state.getIterator());
assertEquals(1, userResources.size()); assertEquals(1, userResources.size());
RecoveredUserResources rur = userResources.get(user); RecoveredUserResources rur = userResources.get(user);
LocalResourceTrackerState privts = rur.getPrivateTrackerState(); LocalResourceTrackerState privts = rur.getPrivateTrackerState();
@ -654,7 +738,7 @@ public class TestNMLeveldbStateStoreService {
assertEquals(1, pubts.getInProgressResources().size()); assertEquals(1, pubts.getInProgressResources().size());
assertEquals(pubRsrcLocalPath2, assertEquals(pubRsrcLocalPath2,
pubts.getInProgressResources().get(pubRsrcProto2)); pubts.getInProgressResources().get(pubRsrcProto2));
userResources = state.getUserResources(); userResources = loadUserResources(state.getIterator());
assertEquals(1, userResources.size()); assertEquals(1, userResources.size());
rur = userResources.get(user); rur = userResources.get(user);
privts = rur.getPrivateTrackerState(); privts = rur.getPrivateTrackerState();
@ -762,7 +846,7 @@ public class TestNMLeveldbStateStoreService {
assertEquals(pubLocalizedProto1, assertEquals(pubLocalizedProto1,
pubts.getLocalizedResources().iterator().next()); pubts.getLocalizedResources().iterator().next());
Map<String, RecoveredUserResources> userResources = Map<String, RecoveredUserResources> userResources =
state.getUserResources(); loadUserResources(state.getIterator());
assertTrue(userResources.isEmpty()); assertTrue(userResources.isEmpty());
} }
@ -771,7 +855,9 @@ public class TestNMLeveldbStateStoreService {
// test empty when no state // test empty when no state
RecoveredDeletionServiceState state = RecoveredDeletionServiceState state =
stateStore.loadDeletionServiceState(); stateStore.loadDeletionServiceState();
assertTrue(state.getTasks().isEmpty()); List<DeletionServiceDeleteTaskProto> deleteTaskProtos =
loadDeletionTaskProtos(state.getIterator());
assertTrue(deleteTaskProtos.isEmpty());
// store a deletion task and verify recovered // store a deletion task and verify recovered
DeletionServiceDeleteTaskProto proto = DeletionServiceDeleteTaskProto proto =
@ -788,8 +874,9 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeDeletionTask(proto.getId(), proto); stateStore.storeDeletionTask(proto.getId(), proto);
restartStateStore(); restartStateStore();
state = stateStore.loadDeletionServiceState(); state = stateStore.loadDeletionServiceState();
assertEquals(1, state.getTasks().size()); deleteTaskProtos = loadDeletionTaskProtos(state.getIterator());
assertEquals(proto, state.getTasks().get(0)); assertEquals(1, deleteTaskProtos.size());
assertEquals(proto, deleteTaskProtos.get(0));
// store another deletion task // store another deletion task
DeletionServiceDeleteTaskProto proto2 = DeletionServiceDeleteTaskProto proto2 =
@ -802,31 +889,36 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeDeletionTask(proto2.getId(), proto2); stateStore.storeDeletionTask(proto2.getId(), proto2);
restartStateStore(); restartStateStore();
state = stateStore.loadDeletionServiceState(); state = stateStore.loadDeletionServiceState();
assertEquals(2, state.getTasks().size()); deleteTaskProtos = loadDeletionTaskProtos(state.getIterator());
assertTrue(state.getTasks().contains(proto)); assertEquals(2, deleteTaskProtos.size());
assertTrue(state.getTasks().contains(proto2)); assertTrue(deleteTaskProtos.contains(proto));
assertTrue(deleteTaskProtos.contains(proto2));
// delete a task and verify gone after recovery // delete a task and verify gone after recovery
stateStore.removeDeletionTask(proto2.getId()); stateStore.removeDeletionTask(proto2.getId());
restartStateStore(); restartStateStore();
state = stateStore.loadDeletionServiceState(); state = stateStore.loadDeletionServiceState();
assertEquals(1, state.getTasks().size()); deleteTaskProtos = loadDeletionTaskProtos(state.getIterator());
assertEquals(proto, state.getTasks().get(0)); assertEquals(1, deleteTaskProtos.size());
assertEquals(proto, deleteTaskProtos.get(0));
// delete the last task and verify none left // delete the last task and verify none left
stateStore.removeDeletionTask(proto.getId()); stateStore.removeDeletionTask(proto.getId());
restartStateStore(); restartStateStore();
state = stateStore.loadDeletionServiceState(); state = stateStore.loadDeletionServiceState();
assertTrue(state.getTasks().isEmpty()); deleteTaskProtos = loadDeletionTaskProtos(state.getIterator());
} assertTrue(deleteTaskProtos.isEmpty()); }
@Test @Test
public void testNMTokenStorage() throws IOException { public void testNMTokenStorage() throws IOException {
// test empty when no state // test empty when no state
RecoveredNMTokensState state = stateStore.loadNMTokensState(); RecoveredNMTokensState state = stateStore.loadNMTokensState();
Map<ApplicationAttemptId, MasterKey> loadedAppKeys =
loadNMTokens(state.getIterator());
assertNull(state.getCurrentMasterKey()); assertNull(state.getCurrentMasterKey());
assertNull(state.getPreviousMasterKey()); assertNull(state.getPreviousMasterKey());
assertTrue(state.getApplicationMasterKeys().isEmpty()); assertTrue(loadedAppKeys.isEmpty());
// store a master key and verify recovered // store a master key and verify recovered
NMTokenSecretManagerForTest secretMgr = new NMTokenSecretManagerForTest(); NMTokenSecretManagerForTest secretMgr = new NMTokenSecretManagerForTest();
@ -834,18 +926,20 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeNMTokenCurrentMasterKey(currentKey); stateStore.storeNMTokenCurrentMasterKey(currentKey);
restartStateStore(); restartStateStore();
state = stateStore.loadNMTokensState(); state = stateStore.loadNMTokensState();
loadedAppKeys = loadNMTokens(state.getIterator());
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertNull(state.getPreviousMasterKey()); assertNull(state.getPreviousMasterKey());
assertTrue(state.getApplicationMasterKeys().isEmpty()); assertTrue(loadedAppKeys.isEmpty());
// store a previous key and verify recovered // store a previous key and verify recovered
MasterKey prevKey = secretMgr.generateKey(); MasterKey prevKey = secretMgr.generateKey();
stateStore.storeNMTokenPreviousMasterKey(prevKey); stateStore.storeNMTokenPreviousMasterKey(prevKey);
restartStateStore(); restartStateStore();
state = stateStore.loadNMTokensState(); state = stateStore.loadNMTokensState();
loadedAppKeys = loadNMTokens(state.getIterator());
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertEquals(prevKey, state.getPreviousMasterKey()); assertEquals(prevKey, state.getPreviousMasterKey());
assertTrue(state.getApplicationMasterKeys().isEmpty()); assertTrue(loadedAppKeys.isEmpty());
// store a few application keys and verify recovered // store a few application keys and verify recovered
ApplicationAttemptId attempt1 = ApplicationAttemptId.newInstance( ApplicationAttemptId attempt1 = ApplicationAttemptId.newInstance(
@ -858,10 +952,9 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeNMTokenApplicationMasterKey(attempt2, attemptKey2); stateStore.storeNMTokenApplicationMasterKey(attempt2, attemptKey2);
restartStateStore(); restartStateStore();
state = stateStore.loadNMTokensState(); state = stateStore.loadNMTokensState();
loadedAppKeys = loadNMTokens(state.getIterator());
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertEquals(prevKey, state.getPreviousMasterKey()); assertEquals(prevKey, state.getPreviousMasterKey());
Map<ApplicationAttemptId, MasterKey> loadedAppKeys =
state.getApplicationMasterKeys();
assertEquals(2, loadedAppKeys.size()); assertEquals(2, loadedAppKeys.size());
assertEquals(attemptKey1, loadedAppKeys.get(attempt1)); assertEquals(attemptKey1, loadedAppKeys.get(attempt1));
assertEquals(attemptKey2, loadedAppKeys.get(attempt2)); assertEquals(attemptKey2, loadedAppKeys.get(attempt2));
@ -880,9 +973,9 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeNMTokenCurrentMasterKey(currentKey); stateStore.storeNMTokenCurrentMasterKey(currentKey);
restartStateStore(); restartStateStore();
state = stateStore.loadNMTokensState(); state = stateStore.loadNMTokensState();
loadedAppKeys = loadNMTokens(state.getIterator());
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertEquals(prevKey, state.getPreviousMasterKey()); assertEquals(prevKey, state.getPreviousMasterKey());
loadedAppKeys = state.getApplicationMasterKeys();
assertEquals(2, loadedAppKeys.size()); assertEquals(2, loadedAppKeys.size());
assertNull(loadedAppKeys.get(attempt1)); assertNull(loadedAppKeys.get(attempt1));
assertEquals(attemptKey2, loadedAppKeys.get(attempt2)); assertEquals(attemptKey2, loadedAppKeys.get(attempt2));
@ -894,9 +987,10 @@ public class TestNMLeveldbStateStoreService {
// test empty when no state // test empty when no state
RecoveredContainerTokensState state = RecoveredContainerTokensState state =
stateStore.loadContainerTokensState(); stateStore.loadContainerTokensState();
Map<ContainerId, Long> loadedActiveTokens = loadContainerTokens(state.it);
assertNull(state.getCurrentMasterKey()); assertNull(state.getCurrentMasterKey());
assertNull(state.getPreviousMasterKey()); assertNull(state.getPreviousMasterKey());
assertTrue(state.getActiveTokens().isEmpty()); assertTrue(loadedActiveTokens.isEmpty());
// store a master key and verify recovered // store a master key and verify recovered
ContainerTokenKeyGeneratorForTest keygen = ContainerTokenKeyGeneratorForTest keygen =
@ -905,18 +999,20 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerTokenCurrentMasterKey(currentKey); stateStore.storeContainerTokenCurrentMasterKey(currentKey);
restartStateStore(); restartStateStore();
state = stateStore.loadContainerTokensState(); state = stateStore.loadContainerTokensState();
loadedActiveTokens = loadContainerTokens(state.it);
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertNull(state.getPreviousMasterKey()); assertNull(state.getPreviousMasterKey());
assertTrue(state.getActiveTokens().isEmpty()); assertTrue(loadedActiveTokens.isEmpty());
// store a previous key and verify recovered // store a previous key and verify recovered
MasterKey prevKey = keygen.generateKey(); MasterKey prevKey = keygen.generateKey();
stateStore.storeContainerTokenPreviousMasterKey(prevKey); stateStore.storeContainerTokenPreviousMasterKey(prevKey);
restartStateStore(); restartStateStore();
state = stateStore.loadContainerTokensState(); state = stateStore.loadContainerTokensState();
loadedActiveTokens = loadContainerTokens(state.it);
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertEquals(prevKey, state.getPreviousMasterKey()); assertEquals(prevKey, state.getPreviousMasterKey());
assertTrue(state.getActiveTokens().isEmpty()); assertTrue(loadedActiveTokens.isEmpty());
// store a few container tokens and verify recovered // store a few container tokens and verify recovered
ContainerId cid1 = BuilderUtils.newContainerId(1, 1, 1, 1); ContainerId cid1 = BuilderUtils.newContainerId(1, 1, 1, 1);
@ -927,10 +1023,9 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerToken(cid2, expTime2); stateStore.storeContainerToken(cid2, expTime2);
restartStateStore(); restartStateStore();
state = stateStore.loadContainerTokensState(); state = stateStore.loadContainerTokensState();
loadedActiveTokens = loadContainerTokens(state.it);
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertEquals(prevKey, state.getPreviousMasterKey()); assertEquals(prevKey, state.getPreviousMasterKey());
Map<ContainerId, Long> loadedActiveTokens =
state.getActiveTokens();
assertEquals(2, loadedActiveTokens.size()); assertEquals(2, loadedActiveTokens.size());
assertEquals(expTime1, loadedActiveTokens.get(cid1)); assertEquals(expTime1, loadedActiveTokens.get(cid1));
assertEquals(expTime2, loadedActiveTokens.get(cid2)); assertEquals(expTime2, loadedActiveTokens.get(cid2));
@ -948,9 +1043,9 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerTokenCurrentMasterKey(currentKey); stateStore.storeContainerTokenCurrentMasterKey(currentKey);
restartStateStore(); restartStateStore();
state = stateStore.loadContainerTokensState(); state = stateStore.loadContainerTokensState();
loadedActiveTokens = loadContainerTokens(state.it);
assertEquals(currentKey, state.getCurrentMasterKey()); assertEquals(currentKey, state.getCurrentMasterKey());
assertEquals(prevKey, state.getPreviousMasterKey()); assertEquals(prevKey, state.getPreviousMasterKey());
loadedActiveTokens = state.getActiveTokens();
assertEquals(2, loadedActiveTokens.size()); assertEquals(2, loadedActiveTokens.size());
assertNull(loadedActiveTokens.get(cid1)); assertNull(loadedActiveTokens.get(cid1));
assertEquals(expTime2, loadedActiveTokens.get(cid2)); assertEquals(expTime2, loadedActiveTokens.get(cid2));
@ -1029,8 +1124,8 @@ public class TestNMLeveldbStateStoreService {
@Test @Test
public void testUnexpectedKeyDoesntThrowException() throws IOException { public void testUnexpectedKeyDoesntThrowException() throws IOException {
// test empty when no state // test empty when no state
List<RecoveredContainerState> recoveredContainers = stateStore List<RecoveredContainerState> recoveredContainers =
.loadContainersState(); loadContainersState(stateStore.getContainerStateIterator());
assertTrue(recoveredContainers.isEmpty()); assertTrue(recoveredContainers.isEmpty());
ApplicationId appId = ApplicationId.newInstance(1234, 3); ApplicationId appId = ApplicationId.newInstance(1234, 3);
@ -1045,7 +1140,8 @@ public class TestNMLeveldbStateStoreService {
+ containerId.toString() + "/invalidKey1234").getBytes(); + containerId.toString() + "/invalidKey1234").getBytes();
stateStore.getDB().put(invalidKey, new byte[1]); stateStore.getDB().put(invalidKey, new byte[1]);
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
RecoveredContainerState rcs = recoveredContainers.get(0); RecoveredContainerState rcs = recoveredContainers.get(0);
assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus()); assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus());
@ -1162,8 +1258,8 @@ public class TestNMLeveldbStateStoreService {
@Test @Test
public void testStateStoreForResourceMapping() throws IOException { public void testStateStoreForResourceMapping() throws IOException {
// test empty when no state // test empty when no state
List<RecoveredContainerState> recoveredContainers = stateStore List<RecoveredContainerState> recoveredContainers =
.loadContainersState(); loadContainersState(stateStore.getContainerStateIterator());
assertTrue(recoveredContainers.isEmpty()); assertTrue(recoveredContainers.isEmpty());
ApplicationId appId = ApplicationId.newInstance(1234, 3); ApplicationId appId = ApplicationId.newInstance(1234, 3);
@ -1190,7 +1286,8 @@ public class TestNMLeveldbStateStoreService {
// add a invalid key // add a invalid key
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers =
loadContainersState(stateStore.getContainerStateIterator());
assertEquals(1, recoveredContainers.size()); assertEquals(1, recoveredContainers.size());
RecoveredContainerState rcs = recoveredContainers.get(0); RecoveredContainerState rcs = recoveredContainers.get(0);
List<Serializable> res = rcs.getResourceMappings() List<Serializable> res = rcs.getResourceMappings()
@ -1253,7 +1350,8 @@ public class TestNMLeveldbStateStoreService {
stateStore.storeContainerRestartTimes(containerId, stateStore.storeContainerRestartTimes(containerId,
restartTimes); restartTimes);
restartStateStore(); restartStateStore();
RecoveredContainerState rcs = stateStore.loadContainersState().get(0); RecoveredContainerState rcs =
loadContainersState(stateStore.getContainerStateIterator()).get(0);
List<Long> recoveredRestartTimes = rcs.getRestartTimes(); List<Long> recoveredRestartTimes = rcs.getRestartTimes();
assertTrue(recoveredRestartTimes.isEmpty()); assertTrue(recoveredRestartTimes.isEmpty());
} }