From 4f2f0eb0bb3ebb57173a521adca75ec08ddb9fba Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Tue, 13 Sep 2016 14:41:27 +0000 Subject: [PATCH] YARN-5630. NM fails to start after downgrade from 2.8 to 2.7. Contributed by Jason Lowe (cherry picked from commit e7933097354a246b080b46f1a4ca2ef0f39f3b38) --- .../recovery/NMLeveldbStateStoreService.java | 23 +++-- .../TestNMLeveldbStateStoreService.java | 84 +++++++++++-------- 2 files changed, 65 insertions(+), 42 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java index f1ebf40f1e3..eba2b92ca78 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java @@ -271,21 +271,24 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { @Override public void storeContainer(ContainerId containerId, int containerVersion, StartContainerRequest startRequest) throws IOException { + String idStr = containerId.toString(); if (LOG.isDebugEnabled()) { - LOG.debug("storeContainer: containerId= " + containerId + LOG.debug("storeContainer: containerId= " + idStr + ", startRequest= " + startRequest); } - String keyRequest = CONTAINERS_KEY_PREFIX + containerId.toString() + String keyRequest = CONTAINERS_KEY_PREFIX + idStr + CONTAINER_REQUEST_KEY_SUFFIX; - String keyVersion = CONTAINERS_KEY_PREFIX + containerId.toString() - + CONTAINER_VERSION_KEY_SUFFIX; + String keyVersion = getContainerVersionKey(idStr); try { WriteBatch batch = db.createWriteBatch(); try { batch.put(bytes(keyRequest), ((StartContainerRequestPBImpl) startRequest) .getProto().toByteArray()); - batch.put(bytes(keyVersion), bytes(Integer.toString(containerVersion))); + if (containerVersion != 0) { + batch.put(bytes(keyVersion), + bytes(Integer.toString(containerVersion))); + } db.write(batch); } finally { batch.close(); @@ -295,6 +298,11 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { } } + @VisibleForTesting + String getContainerVersionKey(String containerId) { + return CONTAINERS_KEY_PREFIX + containerId + CONTAINER_VERSION_KEY_SUFFIX; + } + @Override public void storeContainerDiagnostics(ContainerId containerId, StringBuilder diagnostics) throws IOException { @@ -1200,6 +1208,11 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { return CURRENT_VERSION_INFO; } + @VisibleForTesting + DB getDB() { + return db; + } + /** * 1) Versioning scheme: major.minor. For e.g. 1.0, 1.1, 1.2...1.25, 2.0 etc. * 2) Any incompatible change of state-store is a major upgrade, and any diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java index 4952db3b74f..1ffda453b8c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; +import static org.fusesource.leveldbjni.JniDBFactory.bytes; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -78,7 +79,6 @@ import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.server.security.BaseContainerTokenSecretManager; import org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.iq80.leveldb.DB; import org.junit.After; import org.junit.Assert; @@ -226,48 +226,21 @@ public class TestNMLeveldbStateStoreService { ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 4); ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5); - LocalResource lrsrc = LocalResource.newInstance( - URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), - LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, - 1234567890L); - Map localResources = - new HashMap(); - localResources.put("rsrc", lrsrc); - Map env = new HashMap(); - env.put("somevar", "someval"); - List containerCmds = new ArrayList(); - containerCmds.add("somecmd"); - containerCmds.add("somearg"); - Map serviceData = new HashMap(); - serviceData.put("someservice", - ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 })); - ByteBuffer containerTokens = - ByteBuffer.wrap(new byte[] { 0x7, 0x8, 0x9, 0xa }); - Map acls = - new HashMap(); - acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); - acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); - ContainerLaunchContext clc = ContainerLaunchContext.newInstance( - localResources, env, containerCmds, serviceData, containerTokens, - acls); - Resource containerRsrc = Resource.newInstance(1357, 3); - ContainerTokenIdentifier containerTokenId = - new ContainerTokenIdentifier(containerId, "host", "user", - containerRsrc, 9876543210L, 42, 2468, Priority.newInstance(7), - 13579); - Token containerToken = Token.newInstance(containerTokenId.getBytes(), - ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), - "tokenservice"); - StartContainerRequest containerReq = - StartContainerRequest.newInstance(clc, containerToken); + StartContainerRequest containerReq = createContainerRequest(containerId); // store a container and verify recovered - stateStore.storeContainer(containerId, 1, containerReq); + stateStore.storeContainer(containerId, 0, containerReq); + + // verify the container version key is not stored for new containers + DB db = stateStore.getDB(); + assertNull("version key present for new container", db.get(bytes( + stateStore.getContainerVersionKey(containerId.toString())))); + restartStateStore(); recoveredContainers = stateStore.loadContainersState(); assertEquals(1, recoveredContainers.size()); RecoveredContainerState rcs = recoveredContainers.get(0); - assertEquals(1, rcs.getVersion()); + assertEquals(0, rcs.getVersion()); assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus()); assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode()); assertEquals(false, rcs.getKilled()); @@ -356,6 +329,43 @@ public class TestNMLeveldbStateStoreService { assertTrue(recoveredContainers.isEmpty()); } + private StartContainerRequest createContainerRequest( + ContainerId containerId) { + LocalResource lrsrc = LocalResource.newInstance( + URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), + LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, + 1234567890L); + Map localResources = + new HashMap(); + localResources.put("rsrc", lrsrc); + Map env = new HashMap(); + env.put("somevar", "someval"); + List containerCmds = new ArrayList(); + containerCmds.add("somecmd"); + containerCmds.add("somearg"); + Map serviceData = new HashMap(); + serviceData.put("someservice", + ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 })); + ByteBuffer containerTokens = + ByteBuffer.wrap(new byte[] { 0x7, 0x8, 0x9, 0xa }); + Map acls = + new HashMap(); + acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); + acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, env, containerCmds, serviceData, containerTokens, + acls); + Resource containerRsrc = Resource.newInstance(1357, 3); + ContainerTokenIdentifier containerTokenId = + new ContainerTokenIdentifier(containerId, "host", "user", + containerRsrc, 9876543210L, 42, 2468, Priority.newInstance(7), + 13579); + Token containerToken = Token.newInstance(containerTokenId.getBytes(), + ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), + "tokenservice"); + return StartContainerRequest.newInstance(clc, containerToken); + } + @Test public void testStartResourceLocalization() throws IOException { String user = "somebody";