HDDS-1527. HDDS Datanode start fails due to datanode.id file read error. Contributed by Siddharth Wagle.

This closes #822.
This commit is contained in:
Siddharth 2019-05-16 15:13:10 -07:00 committed by Xiaoyu Yao
parent fab5b80a36
commit c183bd8e20
2 changed files with 57 additions and 32 deletions

View File

@ -24,6 +24,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Res
import static org.apache.hadoop.ozone.container.common.impl.ContainerData.CHARSET_ENCODING; import static org.apache.hadoop.ozone.container.common.impl.ContainerData.CHARSET_ENCODING;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.security.MessageDigest; import java.security.MessageDigest;
@ -35,6 +36,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerData;
@ -51,6 +53,9 @@ import com.google.common.base.Preconditions;
*/ */
public final class ContainerUtils { public final class ContainerUtils {
private static final Logger LOG =
LoggerFactory.getLogger(ContainerUtils.class);
private ContainerUtils() { private ContainerUtils() {
//never constructed. //never constructed.
} }
@ -198,7 +203,7 @@ public final class ContainerUtils {
throw new IOException("Unable to overwrite the datanode ID file."); throw new IOException("Unable to overwrite the datanode ID file.");
} }
} else { } else {
if(!path.getParentFile().exists() && if (!path.getParentFile().exists() &&
!path.getParentFile().mkdirs()) { !path.getParentFile().mkdirs()) {
throw new IOException("Unable to create datanode ID directories."); throw new IOException("Unable to create datanode ID directories.");
} }
@ -221,8 +226,16 @@ public final class ContainerUtils {
try { try {
return DatanodeIdYaml.readDatanodeIdFile(path); return DatanodeIdYaml.readDatanodeIdFile(path);
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Error loading DatanodeDetails yaml from " +
path.getAbsolutePath(), e);
// Try to load as protobuf before giving up
try (FileInputStream in = new FileInputStream(path)) {
return DatanodeDetails.getFromProtoBuf(
HddsProtos.DatanodeDetailsProto.parseFrom(in));
} catch (IOException io) {
throw new IOException("Failed to parse DatanodeDetails from " throw new IOException("Failed to parse DatanodeDetails from "
+ path.getAbsolutePath(), e); + path.getAbsolutePath(), io);
}
} }
} }

View File

@ -18,31 +18,13 @@
package org.apache.hadoop.ozone; package org.apache.hadoop.ozone;
import org.apache.commons.lang3.RandomUtils; import static org.apache.hadoop.hdds.protocol.DatanodeDetails.Port;
import org.apache.hadoop.conf.Configuration; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
import org.apache.hadoop.hdds.HddsConfigKeys; import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT;
import org.apache.hadoop.hdds.conf.OzoneConfiguration; import static org.junit.Assert.assertEquals;
import org.apache.hadoop.hdds.protocol.DatanodeDetails; import static org.junit.Assert.assertFalse;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import static org.junit.Assert.assertTrue;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import static org.junit.Assert.fail;
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.statemachine
.DatanodeStateMachine;
import org.apache.hadoop.ozone.container.common.statemachine
.EndpointStateMachine;
import org.apache.hadoop.ozone.container.ozoneimpl.TestOzoneContainer;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.TestUtils;
import org.apache.hadoop.hdds.scm.XceiverClientGrpc;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.test.TestGenericTestUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.yaml.snakeyaml.Yaml;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
@ -52,10 +34,29 @@ import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import static org.apache.hadoop.hdds.protocol.DatanodeDetails.Port; import org.apache.commons.lang3.RandomUtils;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; import org.apache.hadoop.conf.Configuration;
import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT; import org.apache.hadoop.hdds.HddsConfigKeys;
import static org.junit.Assert.*; import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.TestUtils;
import org.apache.hadoop.hdds.scm.XceiverClientGrpc;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
import org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine;
import org.apache.hadoop.ozone.container.ozoneimpl.TestOzoneContainer;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.test.TestGenericTestUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.yaml.snakeyaml.Yaml;
/** /**
* Test cases for mini ozone cluster. * Test cases for mini ozone cluster.
@ -167,6 +168,17 @@ public class TestMiniOzoneCluster {
} catch (Exception e) { } catch (Exception e) {
assertTrue(e instanceof IOException); assertTrue(e instanceof IOException);
} }
// Test upgrade scenario - protobuf file instead of yaml
File protoFile = new File(WRITE_TMP, "valid-proto.id");
try (FileOutputStream out = new FileOutputStream(protoFile)) {
HddsProtos.DatanodeDetailsProto proto = id1.getProtoBufMessage();
proto.writeTo(out);
}
validId = ContainerUtils.readDatanodeDetailsFrom(protoFile);
assertEquals(validId.getCertSerialId(), certSerialId);
assertEquals(id1, validId);
assertEquals(id1.getProtoBufMessage(), validId.getProtoBufMessage());
} }
@Test @Test