YARN-8558. NM recovery level db not cleaned up properly on container finish. Contributed by Bibin A Chundatt.

(cherry picked from commit 3d586841ab)
This commit is contained in:
bibinchundatt 2018-07-28 20:52:39 +05:30
parent c2c3eee69c
commit 2e7876a725
2 changed files with 17 additions and 4 deletions

View File

@ -143,9 +143,9 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
NM_TOKENS_KEY_PREFIX + PREV_MASTER_KEY_SUFFIX; NM_TOKENS_KEY_PREFIX + PREV_MASTER_KEY_SUFFIX;
private static final String CONTAINER_TOKENS_KEY_PREFIX = private static final String CONTAINER_TOKENS_KEY_PREFIX =
"ContainerTokens/"; "ContainerTokens/";
private static final String CONTAINER_TOKENS_CURRENT_MASTER_KEY = private static final String CONTAINER_TOKEN_SECRETMANAGER_CURRENT_MASTER_KEY =
CONTAINER_TOKENS_KEY_PREFIX + CURRENT_MASTER_KEY_SUFFIX; CONTAINER_TOKENS_KEY_PREFIX + CURRENT_MASTER_KEY_SUFFIX;
private static final String CONTAINER_TOKENS_PREV_MASTER_KEY = private static final String CONTAINER_TOKEN_SECRETMANAGER_PREV_MASTER_KEY =
CONTAINER_TOKENS_KEY_PREFIX + PREV_MASTER_KEY_SUFFIX; CONTAINER_TOKENS_KEY_PREFIX + PREV_MASTER_KEY_SUFFIX;
private static final String LOG_DELETER_KEY_PREFIX = "LogDeleters/"; private static final String LOG_DELETER_KEY_PREFIX = "LogDeleters/";
@ -658,6 +658,12 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
batch.delete(bytes(keyPrefix + CONTAINER_KILLED_KEY_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_KILLED_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_EXIT_CODE_KEY_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_EXIT_CODE_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_UPDATE_TOKEN_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_UPDATE_TOKEN_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_START_TIME_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_LOG_DIR_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_VERSION_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_REMAIN_RETRIES_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_RESTART_TIMES_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_WORK_DIR_KEY_SUFFIX));
List<String> unknownKeysForContainer = containerUnknownKeySuffixes List<String> unknownKeysForContainer = containerUnknownKeySuffixes
.removeAll(containerId); .removeAll(containerId);
for (String unknownKeySuffix : unknownKeysForContainer) { for (String unknownKeySuffix : unknownKeysForContainer) {
@ -1169,13 +1175,13 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
@Override @Override
public void storeContainerTokenCurrentMasterKey(MasterKey key) public void storeContainerTokenCurrentMasterKey(MasterKey key)
throws IOException { throws IOException {
storeMasterKey(CONTAINER_TOKENS_CURRENT_MASTER_KEY, key); storeMasterKey(CONTAINER_TOKEN_SECRETMANAGER_CURRENT_MASTER_KEY, key);
} }
@Override @Override
public void storeContainerTokenPreviousMasterKey(MasterKey key) public void storeContainerTokenPreviousMasterKey(MasterKey key)
throws IOException { throws IOException {
storeMasterKey(CONTAINER_TOKENS_PREV_MASTER_KEY, key); storeMasterKey(CONTAINER_TOKEN_SECRETMANAGER_PREV_MASTER_KEY, key);
} }
@Override @Override

View File

@ -28,7 +28,9 @@ import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any; import static org.mockito.Matchers.any;
import static org.mockito.Mockito.isNull; import static org.mockito.Mockito.isNull;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.timeout;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@ -382,6 +384,11 @@ public class TestNMLeveldbStateStoreService {
restartStateStore(); restartStateStore();
recoveredContainers = stateStore.loadContainersState(); recoveredContainers = stateStore.loadContainersState();
assertTrue(recoveredContainers.isEmpty()); assertTrue(recoveredContainers.isEmpty());
// recover again to check remove clears all containers
restartStateStore();
NMStateStoreService nmStoreSpy = spy(stateStore);
nmStoreSpy.loadContainersState();
verify(nmStoreSpy,times(0)).removeContainer(any(ContainerId.class));
} }
private void validateRetryAttempts(ContainerId containerId) private void validateRetryAttempts(ContainerId containerId)