Merge r1504261 from trunk to branch-2 for YARN-922. Change FileSystemRMStateStore to use directories (Jian He via bikas)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1504264 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Bikas Saha 2013-07-17 20:23:55 +00:00
parent 2dba05af50
commit 859b53435f
3 changed files with 76 additions and 92 deletions

View File

@ -483,6 +483,9 @@ Release 2.1.0-beta - 2013-07-02
YARN-927. Change ContainerRequest to not have more than 1 container count YARN-927. Change ContainerRequest to not have more than 1 container count
and remove StoreContainerRequest (bikas) and remove StoreContainerRequest (bikas)
YARN-922. Change FileSystemRMStateStore to use directories (Jian He via
bikas)
OPTIMIZATIONS OPTIMIZATIONS
YARN-512. Log aggregation root directory check is more expensive than it YARN-512. Log aggregation root directory check is more expensive than it

View File

@ -111,70 +111,66 @@ public class FileSystemRMStateStore extends RMStateStore {
private void loadRMAppState(RMState rmState) throws Exception { private void loadRMAppState(RMState rmState) throws Exception {
try { try {
FileStatus[] childNodes = fs.listStatus(rmAppRoot);
List<ApplicationAttemptState> attempts = List<ApplicationAttemptState> attempts =
new ArrayList<ApplicationAttemptState>(); new ArrayList<ApplicationAttemptState>();
for(FileStatus childNodeStatus : childNodes) {
assert childNodeStatus.isFile();
String childNodeName = childNodeStatus.getPath().getName();
Path childNodePath = getNodePath(rmAppRoot, childNodeName);
byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
if(childNodeName.startsWith(ApplicationId.appIdStrPrefix)){
// application
LOG.info("Loading application from node: " + childNodeName);
ApplicationId appId = ConverterUtils.toApplicationId(childNodeName);
ApplicationStateDataPBImpl appStateData =
new ApplicationStateDataPBImpl(
ApplicationStateDataProto.parseFrom(childData));
ApplicationState appState = new ApplicationState(
appStateData.getSubmitTime(),
appStateData.getApplicationSubmissionContext(),
appStateData.getUser());
// assert child node name is same as actual applicationId
assert appId.equals(appState.context.getApplicationId());
rmState.appState.put(appId, appState);
} else if(childNodeName.startsWith(
ApplicationAttemptId.appAttemptIdStrPrefix)) {
// attempt
LOG.info("Loading application attempt from node: " + childNodeName);
ApplicationAttemptId attemptId =
ConverterUtils.toApplicationAttemptId(childNodeName);
ApplicationAttemptStateDataPBImpl attemptStateData =
new ApplicationAttemptStateDataPBImpl(
ApplicationAttemptStateDataProto.parseFrom(childData));
Credentials credentials = null;
if(attemptStateData.getAppAttemptTokens() != null){
credentials = new Credentials();
DataInputByteBuffer dibb = new DataInputByteBuffer();
dibb.reset(attemptStateData.getAppAttemptTokens());
credentials.readTokenStorageStream(dibb);
}
ApplicationAttemptState attemptState =
new ApplicationAttemptState(attemptId,
attemptStateData.getMasterContainer(), credentials);
// assert child node name is same as application attempt id for (FileStatus appDir : fs.listStatus(rmAppRoot)) {
assert attemptId.equals(attemptState.getAttemptId()); for (FileStatus childNodeStatus : fs.listStatus(appDir.getPath())) {
attempts.add(attemptState); assert childNodeStatus.isFile();
} else { String childNodeName = childNodeStatus.getPath().getName();
LOG.info("Unknown child node with name: " + childNodeName); byte[] childData =
readFile(childNodeStatus.getPath(), childNodeStatus.getLen());
if (childNodeName.startsWith(ApplicationId.appIdStrPrefix)) {
// application
LOG.info("Loading application from node: " + childNodeName);
ApplicationId appId = ConverterUtils.toApplicationId(childNodeName);
ApplicationStateDataPBImpl appStateData =
new ApplicationStateDataPBImpl(
ApplicationStateDataProto.parseFrom(childData));
ApplicationState appState =
new ApplicationState(appStateData.getSubmitTime(),
appStateData.getApplicationSubmissionContext(),
appStateData.getUser());
// assert child node name is same as actual applicationId
assert appId.equals(appState.context.getApplicationId());
rmState.appState.put(appId, appState);
} else if (childNodeName
.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) {
// attempt
LOG.info("Loading application attempt from node: " + childNodeName);
ApplicationAttemptId attemptId =
ConverterUtils.toApplicationAttemptId(childNodeName);
ApplicationAttemptStateDataPBImpl attemptStateData =
new ApplicationAttemptStateDataPBImpl(
ApplicationAttemptStateDataProto.parseFrom(childData));
Credentials credentials = null;
if (attemptStateData.getAppAttemptTokens() != null) {
credentials = new Credentials();
DataInputByteBuffer dibb = new DataInputByteBuffer();
dibb.reset(attemptStateData.getAppAttemptTokens());
credentials.readTokenStorageStream(dibb);
}
ApplicationAttemptState attemptState =
new ApplicationAttemptState(attemptId,
attemptStateData.getMasterContainer(), credentials);
// assert child node name is same as application attempt id
assert attemptId.equals(attemptState.getAttemptId());
attempts.add(attemptState);
} else {
LOG.info("Unknown child node with name: " + childNodeName);
}
} }
} }
// go through all attempts and add them to their apps // go through all attempts and add them to their apps, Ideally, each
for(ApplicationAttemptState attemptState : attempts) { // attempt node must have a corresponding app node, because remove
// directory operation remove both at the same time
for (ApplicationAttemptState attemptState : attempts) {
ApplicationId appId = attemptState.getAttemptId().getApplicationId(); ApplicationId appId = attemptState.getAttemptId().getApplicationId();
ApplicationState appState = rmState.appState.get(appId); ApplicationState appState = rmState.appState.get(appId);
if(appState != null) { assert appState != null;
appState.attempts.put(attemptState.getAttemptId(), attemptState); appState.attempts.put(attemptState.getAttemptId(), attemptState);
} else {
// the application node may have been removed when the application
// completed but the RM might have stopped before it could remove the
// application attempt nodes
LOG.info("Application node not found for attempt: "
+ attemptState.getAttemptId());
deleteFile(getNodePath(rmAppRoot, attemptState.getAttemptId().toString()));
}
} }
} catch (Exception e) { } catch (Exception e) {
LOG.error("Failed to load state.", e); LOG.error("Failed to load state.", e);
@ -188,6 +184,12 @@ public class FileSystemRMStateStore extends RMStateStore {
for(FileStatus childNodeStatus : childNodes) { for(FileStatus childNodeStatus : childNodes) {
assert childNodeStatus.isFile(); assert childNodeStatus.isFile();
String childNodeName = childNodeStatus.getPath().getName(); String childNodeName = childNodeStatus.getPath().getName();
if(childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) {
rmState.rmSecretManagerState.dtSequenceNumber =
Integer.parseInt(childNodeName.split("_")[1]);
continue;
}
Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName); Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName);
byte[] childData = readFile(childNodePath, childNodeStatus.getLen()); byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
ByteArrayInputStream is = new ByteArrayInputStream(childData); ByteArrayInputStream is = new ByteArrayInputStream(childData);
@ -202,10 +204,7 @@ public class FileSystemRMStateStore extends RMStateStore {
long renewDate = fsIn.readLong(); long renewDate = fsIn.readLong();
rmState.rmSecretManagerState.delegationTokenState.put(identifier, rmState.rmSecretManagerState.delegationTokenState.put(identifier,
renewDate); renewDate);
} else if(childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) { } else {
rmState.rmSecretManagerState.dtSequenceNumber =
Integer.parseInt(childNodeName.split("_")[1]);
}else {
LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager"); LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager");
} }
fsIn.close(); fsIn.close();
@ -215,7 +214,9 @@ public class FileSystemRMStateStore extends RMStateStore {
@Override @Override
public synchronized void storeApplicationState(String appId, public synchronized void storeApplicationState(String appId,
ApplicationStateDataPBImpl appStateDataPB) throws Exception { ApplicationStateDataPBImpl appStateDataPB) throws Exception {
Path nodeCreatePath = getNodePath(rmAppRoot, appId); Path appDirPath = getAppDir(rmAppRoot, appId);
fs.mkdirs(appDirPath);
Path nodeCreatePath = getNodePath(appDirPath, appId);
LOG.info("Storing info for app: " + appId + " at: " + nodeCreatePath); LOG.info("Storing info for app: " + appId + " at: " + nodeCreatePath);
byte[] appStateData = appStateDataPB.getProto().toByteArray(); byte[] appStateData = appStateDataPB.getProto().toByteArray();
@ -232,7 +233,11 @@ public class FileSystemRMStateStore extends RMStateStore {
@Override @Override
public synchronized void storeApplicationAttemptState(String attemptId, public synchronized void storeApplicationAttemptState(String attemptId,
ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception { ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception {
Path nodeCreatePath = getNodePath(rmAppRoot, attemptId); ApplicationAttemptId appAttemptId =
ConverterUtils.toApplicationAttemptId(attemptId);
Path appDirPath =
getAppDir(rmAppRoot, appAttemptId.getApplicationId().toString());
Path nodeCreatePath = getNodePath(appDirPath, attemptId);
LOG.info("Storing info for attempt: " + attemptId LOG.info("Storing info for attempt: " + attemptId
+ " at: " + nodeCreatePath); + " at: " + nodeCreatePath);
byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray();
@ -250,20 +255,9 @@ public class FileSystemRMStateStore extends RMStateStore {
public synchronized void removeApplicationState(ApplicationState appState) public synchronized void removeApplicationState(ApplicationState appState)
throws Exception { throws Exception {
String appId = appState.getAppId().toString(); String appId = appState.getAppId().toString();
Path nodeRemovePath = getNodePath(rmAppRoot, appId); Path nodeRemovePath = getAppDir(rmAppRoot, appId);
LOG.info("Removing info for app: " + appId + " at: " + nodeRemovePath); LOG.info("Removing info for app: " + appId + " at: " + nodeRemovePath);
deleteFile(nodeRemovePath); deleteFile(nodeRemovePath);
for(ApplicationAttemptId attemptId : appState.attempts.keySet()) {
removeApplicationAttemptState(attemptId.toString());
}
}
public synchronized void removeApplicationAttemptState(String attemptId)
throws Exception {
Path nodeRemovePath = getNodePath(rmAppRoot, attemptId);
LOG.info("Removing info for attempt: " + attemptId
+ " at: " + nodeRemovePath);
deleteFile(nodeRemovePath);
} }
@Override @Override
@ -329,6 +323,10 @@ public class FileSystemRMStateStore extends RMStateStore {
deleteFile(nodeCreatePath); deleteFile(nodeCreatePath);
} }
private Path getAppDir(Path root, String appId) {
return getNodePath(root, appId);
}
// FileSystem related code // FileSystem related code
private void deleteFile(Path deletePath) throws Exception { private void deleteFile(Path deletePath) throws Exception {

View File

@ -105,8 +105,6 @@ public class TestRMStateStore {
interface RMStateStoreHelper { interface RMStateStoreHelper {
RMStateStore getRMStateStore() throws Exception; RMStateStore getRMStateStore() throws Exception;
void addOrphanAttemptIfNeeded(RMStateStore testStore,
TestDispatcher dispatcher) throws Exception;
boolean isFinalStateValid() throws Exception; boolean isFinalStateValid() throws Exception;
} }
@ -153,15 +151,6 @@ public class TestRMStateStore {
return store; return store;
} }
@Override
public void addOrphanAttemptIfNeeded(RMStateStore testStore,
TestDispatcher dispatcher) throws Exception {
ApplicationAttemptId attemptId = ConverterUtils.toApplicationAttemptId(
"appattempt_1352994193343_0003_000001");
storeAttempt(testStore, attemptId,
"container_1352994193343_0003_01_000001", null, null, dispatcher);
}
@Override @Override
public boolean isFinalStateValid() throws Exception { public boolean isFinalStateValid() throws Exception {
FileSystem fs = cluster.getFileSystem(); FileSystem fs = cluster.getFileSystem();
@ -289,9 +278,6 @@ public class TestRMStateStore {
attempts.put(attemptIdRemoved, mockRemovedAttempt); attempts.put(attemptIdRemoved, mockRemovedAttempt);
store.removeApplication(mockRemovedApp); store.removeApplication(mockRemovedApp);
// add orphan attempt file to simulate incomplete removal of app state
stateStoreHelper.addOrphanAttemptIfNeeded(store, dispatcher);
// let things settle down // let things settle down
Thread.sleep(1000); Thread.sleep(1000);
store.close(); store.close();
@ -301,9 +287,6 @@ public class TestRMStateStore {
RMState state = store.loadState(); RMState state = store.loadState();
Map<ApplicationId, ApplicationState> rmAppState = state.getApplicationState(); Map<ApplicationId, ApplicationState> rmAppState = state.getApplicationState();
// removed app or orphan attempt is not loaded
assertEquals(1, rmAppState.size());
ApplicationState appState = rmAppState.get(appId1); ApplicationState appState = rmAppState.get(appId1);
// app is loaded // app is loaded
assertNotNull(appState); assertNotNull(appState);