Merge r1504261 from trunk to branch-2 for YARN-922. Change FileSystemRMStateStore to use directories (Jian He via bikas)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1504264 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2dba05af50
commit
859b53435f
|
@ -483,6 +483,9 @@ Release 2.1.0-beta - 2013-07-02
|
||||||
YARN-927. Change ContainerRequest to not have more than 1 container count
|
YARN-927. Change ContainerRequest to not have more than 1 container count
|
||||||
and remove StoreContainerRequest (bikas)
|
and remove StoreContainerRequest (bikas)
|
||||||
|
|
||||||
|
YARN-922. Change FileSystemRMStateStore to use directories (Jian He via
|
||||||
|
bikas)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
YARN-512. Log aggregation root directory check is more expensive than it
|
YARN-512. Log aggregation root directory check is more expensive than it
|
||||||
|
|
|
@ -111,70 +111,66 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
|
|
||||||
private void loadRMAppState(RMState rmState) throws Exception {
|
private void loadRMAppState(RMState rmState) throws Exception {
|
||||||
try {
|
try {
|
||||||
FileStatus[] childNodes = fs.listStatus(rmAppRoot);
|
|
||||||
List<ApplicationAttemptState> attempts =
|
List<ApplicationAttemptState> attempts =
|
||||||
new ArrayList<ApplicationAttemptState>();
|
new ArrayList<ApplicationAttemptState>();
|
||||||
for(FileStatus childNodeStatus : childNodes) {
|
|
||||||
assert childNodeStatus.isFile();
|
|
||||||
String childNodeName = childNodeStatus.getPath().getName();
|
|
||||||
Path childNodePath = getNodePath(rmAppRoot, childNodeName);
|
|
||||||
byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
|
|
||||||
if(childNodeName.startsWith(ApplicationId.appIdStrPrefix)){
|
|
||||||
// application
|
|
||||||
LOG.info("Loading application from node: " + childNodeName);
|
|
||||||
ApplicationId appId = ConverterUtils.toApplicationId(childNodeName);
|
|
||||||
ApplicationStateDataPBImpl appStateData =
|
|
||||||
new ApplicationStateDataPBImpl(
|
|
||||||
ApplicationStateDataProto.parseFrom(childData));
|
|
||||||
ApplicationState appState = new ApplicationState(
|
|
||||||
appStateData.getSubmitTime(),
|
|
||||||
appStateData.getApplicationSubmissionContext(),
|
|
||||||
appStateData.getUser());
|
|
||||||
// assert child node name is same as actual applicationId
|
|
||||||
assert appId.equals(appState.context.getApplicationId());
|
|
||||||
rmState.appState.put(appId, appState);
|
|
||||||
} else if(childNodeName.startsWith(
|
|
||||||
ApplicationAttemptId.appAttemptIdStrPrefix)) {
|
|
||||||
// attempt
|
|
||||||
LOG.info("Loading application attempt from node: " + childNodeName);
|
|
||||||
ApplicationAttemptId attemptId =
|
|
||||||
ConverterUtils.toApplicationAttemptId(childNodeName);
|
|
||||||
ApplicationAttemptStateDataPBImpl attemptStateData =
|
|
||||||
new ApplicationAttemptStateDataPBImpl(
|
|
||||||
ApplicationAttemptStateDataProto.parseFrom(childData));
|
|
||||||
Credentials credentials = null;
|
|
||||||
if(attemptStateData.getAppAttemptTokens() != null){
|
|
||||||
credentials = new Credentials();
|
|
||||||
DataInputByteBuffer dibb = new DataInputByteBuffer();
|
|
||||||
dibb.reset(attemptStateData.getAppAttemptTokens());
|
|
||||||
credentials.readTokenStorageStream(dibb);
|
|
||||||
}
|
|
||||||
ApplicationAttemptState attemptState =
|
|
||||||
new ApplicationAttemptState(attemptId,
|
|
||||||
attemptStateData.getMasterContainer(), credentials);
|
|
||||||
|
|
||||||
// assert child node name is same as application attempt id
|
for (FileStatus appDir : fs.listStatus(rmAppRoot)) {
|
||||||
assert attemptId.equals(attemptState.getAttemptId());
|
for (FileStatus childNodeStatus : fs.listStatus(appDir.getPath())) {
|
||||||
attempts.add(attemptState);
|
assert childNodeStatus.isFile();
|
||||||
} else {
|
String childNodeName = childNodeStatus.getPath().getName();
|
||||||
LOG.info("Unknown child node with name: " + childNodeName);
|
byte[] childData =
|
||||||
|
readFile(childNodeStatus.getPath(), childNodeStatus.getLen());
|
||||||
|
if (childNodeName.startsWith(ApplicationId.appIdStrPrefix)) {
|
||||||
|
// application
|
||||||
|
LOG.info("Loading application from node: " + childNodeName);
|
||||||
|
ApplicationId appId = ConverterUtils.toApplicationId(childNodeName);
|
||||||
|
ApplicationStateDataPBImpl appStateData =
|
||||||
|
new ApplicationStateDataPBImpl(
|
||||||
|
ApplicationStateDataProto.parseFrom(childData));
|
||||||
|
ApplicationState appState =
|
||||||
|
new ApplicationState(appStateData.getSubmitTime(),
|
||||||
|
appStateData.getApplicationSubmissionContext(),
|
||||||
|
appStateData.getUser());
|
||||||
|
// assert child node name is same as actual applicationId
|
||||||
|
assert appId.equals(appState.context.getApplicationId());
|
||||||
|
rmState.appState.put(appId, appState);
|
||||||
|
} else if (childNodeName
|
||||||
|
.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) {
|
||||||
|
// attempt
|
||||||
|
LOG.info("Loading application attempt from node: " + childNodeName);
|
||||||
|
ApplicationAttemptId attemptId =
|
||||||
|
ConverterUtils.toApplicationAttemptId(childNodeName);
|
||||||
|
ApplicationAttemptStateDataPBImpl attemptStateData =
|
||||||
|
new ApplicationAttemptStateDataPBImpl(
|
||||||
|
ApplicationAttemptStateDataProto.parseFrom(childData));
|
||||||
|
Credentials credentials = null;
|
||||||
|
if (attemptStateData.getAppAttemptTokens() != null) {
|
||||||
|
credentials = new Credentials();
|
||||||
|
DataInputByteBuffer dibb = new DataInputByteBuffer();
|
||||||
|
dibb.reset(attemptStateData.getAppAttemptTokens());
|
||||||
|
credentials.readTokenStorageStream(dibb);
|
||||||
|
}
|
||||||
|
ApplicationAttemptState attemptState =
|
||||||
|
new ApplicationAttemptState(attemptId,
|
||||||
|
attemptStateData.getMasterContainer(), credentials);
|
||||||
|
|
||||||
|
// assert child node name is same as application attempt id
|
||||||
|
assert attemptId.equals(attemptState.getAttemptId());
|
||||||
|
attempts.add(attemptState);
|
||||||
|
} else {
|
||||||
|
LOG.info("Unknown child node with name: " + childNodeName);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// go through all attempts and add them to their apps
|
// go through all attempts and add them to their apps, Ideally, each
|
||||||
for(ApplicationAttemptState attemptState : attempts) {
|
// attempt node must have a corresponding app node, because remove
|
||||||
|
// directory operation remove both at the same time
|
||||||
|
for (ApplicationAttemptState attemptState : attempts) {
|
||||||
ApplicationId appId = attemptState.getAttemptId().getApplicationId();
|
ApplicationId appId = attemptState.getAttemptId().getApplicationId();
|
||||||
ApplicationState appState = rmState.appState.get(appId);
|
ApplicationState appState = rmState.appState.get(appId);
|
||||||
if(appState != null) {
|
assert appState != null;
|
||||||
appState.attempts.put(attemptState.getAttemptId(), attemptState);
|
appState.attempts.put(attemptState.getAttemptId(), attemptState);
|
||||||
} else {
|
|
||||||
// the application node may have been removed when the application
|
|
||||||
// completed but the RM might have stopped before it could remove the
|
|
||||||
// application attempt nodes
|
|
||||||
LOG.info("Application node not found for attempt: "
|
|
||||||
+ attemptState.getAttemptId());
|
|
||||||
deleteFile(getNodePath(rmAppRoot, attemptState.getAttemptId().toString()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("Failed to load state.", e);
|
LOG.error("Failed to load state.", e);
|
||||||
|
@ -188,6 +184,12 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
for(FileStatus childNodeStatus : childNodes) {
|
for(FileStatus childNodeStatus : childNodes) {
|
||||||
assert childNodeStatus.isFile();
|
assert childNodeStatus.isFile();
|
||||||
String childNodeName = childNodeStatus.getPath().getName();
|
String childNodeName = childNodeStatus.getPath().getName();
|
||||||
|
if(childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) {
|
||||||
|
rmState.rmSecretManagerState.dtSequenceNumber =
|
||||||
|
Integer.parseInt(childNodeName.split("_")[1]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName);
|
Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName);
|
||||||
byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
|
byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
|
||||||
ByteArrayInputStream is = new ByteArrayInputStream(childData);
|
ByteArrayInputStream is = new ByteArrayInputStream(childData);
|
||||||
|
@ -202,10 +204,7 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
long renewDate = fsIn.readLong();
|
long renewDate = fsIn.readLong();
|
||||||
rmState.rmSecretManagerState.delegationTokenState.put(identifier,
|
rmState.rmSecretManagerState.delegationTokenState.put(identifier,
|
||||||
renewDate);
|
renewDate);
|
||||||
} else if(childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) {
|
} else {
|
||||||
rmState.rmSecretManagerState.dtSequenceNumber =
|
|
||||||
Integer.parseInt(childNodeName.split("_")[1]);
|
|
||||||
}else {
|
|
||||||
LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager");
|
LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager");
|
||||||
}
|
}
|
||||||
fsIn.close();
|
fsIn.close();
|
||||||
|
@ -215,7 +214,9 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
@Override
|
@Override
|
||||||
public synchronized void storeApplicationState(String appId,
|
public synchronized void storeApplicationState(String appId,
|
||||||
ApplicationStateDataPBImpl appStateDataPB) throws Exception {
|
ApplicationStateDataPBImpl appStateDataPB) throws Exception {
|
||||||
Path nodeCreatePath = getNodePath(rmAppRoot, appId);
|
Path appDirPath = getAppDir(rmAppRoot, appId);
|
||||||
|
fs.mkdirs(appDirPath);
|
||||||
|
Path nodeCreatePath = getNodePath(appDirPath, appId);
|
||||||
|
|
||||||
LOG.info("Storing info for app: " + appId + " at: " + nodeCreatePath);
|
LOG.info("Storing info for app: " + appId + " at: " + nodeCreatePath);
|
||||||
byte[] appStateData = appStateDataPB.getProto().toByteArray();
|
byte[] appStateData = appStateDataPB.getProto().toByteArray();
|
||||||
|
@ -232,7 +233,11 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
@Override
|
@Override
|
||||||
public synchronized void storeApplicationAttemptState(String attemptId,
|
public synchronized void storeApplicationAttemptState(String attemptId,
|
||||||
ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception {
|
ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception {
|
||||||
Path nodeCreatePath = getNodePath(rmAppRoot, attemptId);
|
ApplicationAttemptId appAttemptId =
|
||||||
|
ConverterUtils.toApplicationAttemptId(attemptId);
|
||||||
|
Path appDirPath =
|
||||||
|
getAppDir(rmAppRoot, appAttemptId.getApplicationId().toString());
|
||||||
|
Path nodeCreatePath = getNodePath(appDirPath, attemptId);
|
||||||
LOG.info("Storing info for attempt: " + attemptId
|
LOG.info("Storing info for attempt: " + attemptId
|
||||||
+ " at: " + nodeCreatePath);
|
+ " at: " + nodeCreatePath);
|
||||||
byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray();
|
byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray();
|
||||||
|
@ -250,20 +255,9 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
public synchronized void removeApplicationState(ApplicationState appState)
|
public synchronized void removeApplicationState(ApplicationState appState)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String appId = appState.getAppId().toString();
|
String appId = appState.getAppId().toString();
|
||||||
Path nodeRemovePath = getNodePath(rmAppRoot, appId);
|
Path nodeRemovePath = getAppDir(rmAppRoot, appId);
|
||||||
LOG.info("Removing info for app: " + appId + " at: " + nodeRemovePath);
|
LOG.info("Removing info for app: " + appId + " at: " + nodeRemovePath);
|
||||||
deleteFile(nodeRemovePath);
|
deleteFile(nodeRemovePath);
|
||||||
for(ApplicationAttemptId attemptId : appState.attempts.keySet()) {
|
|
||||||
removeApplicationAttemptState(attemptId.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void removeApplicationAttemptState(String attemptId)
|
|
||||||
throws Exception {
|
|
||||||
Path nodeRemovePath = getNodePath(rmAppRoot, attemptId);
|
|
||||||
LOG.info("Removing info for attempt: " + attemptId
|
|
||||||
+ " at: " + nodeRemovePath);
|
|
||||||
deleteFile(nodeRemovePath);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -329,6 +323,10 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||||
deleteFile(nodeCreatePath);
|
deleteFile(nodeCreatePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Path getAppDir(Path root, String appId) {
|
||||||
|
return getNodePath(root, appId);
|
||||||
|
}
|
||||||
|
|
||||||
// FileSystem related code
|
// FileSystem related code
|
||||||
|
|
||||||
private void deleteFile(Path deletePath) throws Exception {
|
private void deleteFile(Path deletePath) throws Exception {
|
||||||
|
|
|
@ -105,8 +105,6 @@ public class TestRMStateStore {
|
||||||
|
|
||||||
interface RMStateStoreHelper {
|
interface RMStateStoreHelper {
|
||||||
RMStateStore getRMStateStore() throws Exception;
|
RMStateStore getRMStateStore() throws Exception;
|
||||||
void addOrphanAttemptIfNeeded(RMStateStore testStore,
|
|
||||||
TestDispatcher dispatcher) throws Exception;
|
|
||||||
boolean isFinalStateValid() throws Exception;
|
boolean isFinalStateValid() throws Exception;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -153,15 +151,6 @@ public class TestRMStateStore {
|
||||||
return store;
|
return store;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addOrphanAttemptIfNeeded(RMStateStore testStore,
|
|
||||||
TestDispatcher dispatcher) throws Exception {
|
|
||||||
ApplicationAttemptId attemptId = ConverterUtils.toApplicationAttemptId(
|
|
||||||
"appattempt_1352994193343_0003_000001");
|
|
||||||
storeAttempt(testStore, attemptId,
|
|
||||||
"container_1352994193343_0003_01_000001", null, null, dispatcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isFinalStateValid() throws Exception {
|
public boolean isFinalStateValid() throws Exception {
|
||||||
FileSystem fs = cluster.getFileSystem();
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
@ -289,9 +278,6 @@ public class TestRMStateStore {
|
||||||
attempts.put(attemptIdRemoved, mockRemovedAttempt);
|
attempts.put(attemptIdRemoved, mockRemovedAttempt);
|
||||||
store.removeApplication(mockRemovedApp);
|
store.removeApplication(mockRemovedApp);
|
||||||
|
|
||||||
// add orphan attempt file to simulate incomplete removal of app state
|
|
||||||
stateStoreHelper.addOrphanAttemptIfNeeded(store, dispatcher);
|
|
||||||
|
|
||||||
// let things settle down
|
// let things settle down
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
store.close();
|
store.close();
|
||||||
|
@ -301,9 +287,6 @@ public class TestRMStateStore {
|
||||||
RMState state = store.loadState();
|
RMState state = store.loadState();
|
||||||
Map<ApplicationId, ApplicationState> rmAppState = state.getApplicationState();
|
Map<ApplicationId, ApplicationState> rmAppState = state.getApplicationState();
|
||||||
|
|
||||||
// removed app or orphan attempt is not loaded
|
|
||||||
assertEquals(1, rmAppState.size());
|
|
||||||
|
|
||||||
ApplicationState appState = rmAppState.get(appId1);
|
ApplicationState appState = rmAppState.get(appId1);
|
||||||
// app is loaded
|
// app is loaded
|
||||||
assertNotNull(appState);
|
assertNotNull(appState);
|
||||||
|
|
Loading…
Reference in New Issue