HDDS-1365. Fix error handling in KeyValueContainerCheck. Contributed by Supratim Deka.
This commit is contained in:
parent
aaaf856f4b
commit
f96fb05a2b
|
@ -68,9 +68,26 @@ public class KeyValueContainerCheck {
|
|||
}
|
||||
|
||||
/**
|
||||
* fast checks are basic and do not look inside the metadata files.
|
||||
* Or into the structures on disk. These checks can be done on Open
|
||||
* containers as well without concurrency implications
|
||||
* Run basic integrity checks on container metadata.
|
||||
* These checks do not look inside the metadata files.
|
||||
* Applicable for OPEN containers.
|
||||
*
|
||||
* @return true : corruption detected, false : no corruption.
|
||||
*/
|
||||
public boolean fastCheck() {
|
||||
boolean corruption = false;
|
||||
try {
|
||||
basicChecks();
|
||||
|
||||
} catch (IOException e) {
|
||||
handleCorruption(e);
|
||||
corruption = true;
|
||||
}
|
||||
|
||||
return corruption;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks :
|
||||
* 1. check directory layout
|
||||
* 2. check container file
|
||||
|
@ -78,24 +95,14 @@ public class KeyValueContainerCheck {
|
|||
* @return void
|
||||
*/
|
||||
|
||||
public KvCheckError fastCheck() {
|
||||
private void basicChecks() throws IOException {
|
||||
|
||||
KvCheckError error;
|
||||
LOG.trace("Running fast check for container {};", containerID);
|
||||
LOG.trace("Running basic checks for container {};", containerID);
|
||||
|
||||
error = loadContainerData();
|
||||
if (error != KvCheckError.ERROR_NONE) {
|
||||
return error;
|
||||
}
|
||||
loadContainerData();
|
||||
|
||||
error = checkLayout();
|
||||
if (error != KvCheckError.ERROR_NONE) {
|
||||
return error;
|
||||
}
|
||||
|
||||
error = checkContainerFile();
|
||||
|
||||
return error;
|
||||
checkLayout();
|
||||
checkContainerFile();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -107,129 +114,80 @@ public class KeyValueContainerCheck {
|
|||
* <p>
|
||||
* fullCheck is a superset of fastCheck
|
||||
*
|
||||
* @return void
|
||||
* @return true : corruption detected, false : no corruption.
|
||||
*/
|
||||
public KvCheckError fullCheck() {
|
||||
/**
|
||||
public boolean fullCheck() {
|
||||
boolean corruption = false;
|
||||
|
||||
*/
|
||||
KvCheckError error;
|
||||
try {
|
||||
basicChecks();
|
||||
checkBlockDB();
|
||||
|
||||
error = fastCheck();
|
||||
if (error != KvCheckError.ERROR_NONE) {
|
||||
|
||||
LOG.trace("fastCheck failed, aborting full check for Container {}",
|
||||
containerID);
|
||||
return error;
|
||||
} catch (IOException e) {
|
||||
handleCorruption(e);
|
||||
corruption = true;
|
||||
}
|
||||
|
||||
error = checkBlockDB();
|
||||
|
||||
return error;
|
||||
return corruption;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the integrity of the directory structure of the container.
|
||||
*
|
||||
* @return error code or ERROR_NONE
|
||||
*/
|
||||
private KvCheckError checkLayout() {
|
||||
boolean success;
|
||||
KvCheckError error = KvCheckError.ERROR_NONE;
|
||||
private void checkLayout() throws IOException {
|
||||
|
||||
// is metadataPath accessible as a directory?
|
||||
try {
|
||||
checkDirPath(metadataPath);
|
||||
} catch (IOException ie) {
|
||||
error = KvCheckError.METADATA_PATH_ACCESS;
|
||||
handleCorruption(ie.getMessage(), error, ie);
|
||||
return error;
|
||||
}
|
||||
checkDirPath(metadataPath);
|
||||
|
||||
String chunksPath = onDiskContainerData.getChunksPath();
|
||||
// is chunksPath accessible as a directory?
|
||||
try {
|
||||
checkDirPath(chunksPath);
|
||||
} catch (IOException ie) {
|
||||
error = KvCheckError.CHUNKS_PATH_ACCESS;
|
||||
handleCorruption(ie.getMessage(), error, ie);
|
||||
return error;
|
||||
}
|
||||
|
||||
return error;
|
||||
String chunksPath = onDiskContainerData.getChunksPath();
|
||||
checkDirPath(chunksPath);
|
||||
}
|
||||
|
||||
private void checkDirPath(String path) throws IOException {
|
||||
|
||||
File dirPath = new File(path);
|
||||
String errStr = null;
|
||||
boolean success = true;
|
||||
|
||||
try {
|
||||
if (!dirPath.isDirectory()) {
|
||||
success = false;
|
||||
errStr = "Not a directory [" + path + "]";
|
||||
throw new IOException(errStr);
|
||||
}
|
||||
} catch (SecurityException se) {
|
||||
throw new IOException("Security exception checking dir ["
|
||||
+ path + "]", se);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Generic exception checking dir ["
|
||||
+ path + "]", e);
|
||||
}
|
||||
|
||||
try {
|
||||
String[] ls = dirPath.list();
|
||||
if (ls == null) {
|
||||
// null result implies operation failed
|
||||
success = false;
|
||||
errStr = "null listing for directory [" + path + "]";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Exception listing dir [" + path + "]", e);
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
Preconditions.checkState(errStr != null);
|
||||
String[] ls = dirPath.list();
|
||||
if (ls == null) {
|
||||
// null result implies operation failed
|
||||
errStr = "null listing for directory [" + path + "]";
|
||||
throw new IOException(errStr);
|
||||
}
|
||||
}
|
||||
|
||||
private KvCheckError checkContainerFile() {
|
||||
private void checkContainerFile() throws IOException {
|
||||
/**
|
||||
* compare the values in the container file loaded from disk,
|
||||
* with the values we are expecting
|
||||
*/
|
||||
KvCheckError error = KvCheckError.ERROR_NONE;
|
||||
String dbType;
|
||||
Preconditions
|
||||
.checkState(onDiskContainerData != null, "Container File not loaded");
|
||||
KvCheckAction next;
|
||||
|
||||
try {
|
||||
ContainerUtils.verifyChecksum(onDiskContainerData);
|
||||
} catch (Exception e) {
|
||||
error = KvCheckError.CONTAINERDATA_CKSUM;
|
||||
handleCorruption("Container File Checksum mismatch", error, e);
|
||||
return error;
|
||||
}
|
||||
ContainerUtils.verifyChecksum(onDiskContainerData);
|
||||
|
||||
if (onDiskContainerData.getContainerType()
|
||||
!= ContainerProtos.ContainerType.KeyValueContainer) {
|
||||
String errStr = "Bad Container type in Containerdata for " + containerID;
|
||||
error = KvCheckError.CONTAINERDATA_TYPE;
|
||||
handleCorruption(errStr, error, null);
|
||||
return error; // Abort if we do not know the type of Container
|
||||
throw new IOException(errStr);
|
||||
}
|
||||
|
||||
if (onDiskContainerData.getContainerID() != containerID) {
|
||||
String errStr =
|
||||
"Bad ContainerID field in Containerdata for " + containerID;
|
||||
error = KvCheckError.CONTAINERDATA_ID;
|
||||
next = handleCorruption(errStr, error, null);
|
||||
if (next == KvCheckAction.ABORT) {
|
||||
return error;
|
||||
} // else continue checking other data elements
|
||||
throw new IOException(errStr);
|
||||
}
|
||||
|
||||
dbType = onDiskContainerData.getContainerDBType();
|
||||
|
@ -237,9 +195,7 @@ public class KeyValueContainerCheck {
|
|||
!dbType.equals(OZONE_METADATA_STORE_IMPL_LEVELDB)) {
|
||||
String errStr = "Unknown DBType [" + dbType
|
||||
+ "] in Container File for [" + containerID + "]";
|
||||
error = KvCheckError.CONTAINERDATA_DBTYPE;
|
||||
handleCorruption(errStr, error, null);
|
||||
return error;
|
||||
throw new IOException(errStr);
|
||||
}
|
||||
|
||||
KeyValueContainerData kvData = onDiskContainerData;
|
||||
|
@ -248,17 +204,11 @@ public class KeyValueContainerCheck {
|
|||
"Bad metadata path in Containerdata for " + containerID + "Expected ["
|
||||
+ metadataPath.toString() + "] Got [" + kvData.getMetadataPath()
|
||||
+ "]";
|
||||
error = KvCheckError.CONTAINERDATA_METADATA_PATH;
|
||||
next = handleCorruption(errStr, error, null);
|
||||
if (next == KvCheckAction.ABORT) {
|
||||
return error;
|
||||
}
|
||||
throw new IOException(errStr);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
private KvCheckError checkBlockDB() {
|
||||
private void checkBlockDB() throws IOException {
|
||||
/**
|
||||
* Check the integrity of the DB inside each container.
|
||||
* In Scope:
|
||||
|
@ -269,52 +219,31 @@ public class KeyValueContainerCheck {
|
|||
* 1. chunk checksum verification. this is left to a separate
|
||||
* slow chunk scanner
|
||||
*/
|
||||
KvCheckError error;
|
||||
Preconditions.checkState(onDiskContainerData != null,
|
||||
"invoke loadContainerData prior to calling this function");
|
||||
File dbFile;
|
||||
File metaDir = new File(metadataPath);
|
||||
|
||||
try {
|
||||
dbFile = KeyValueContainerLocationUtil
|
||||
.getContainerDBFile(metaDir, containerID);
|
||||
dbFile = KeyValueContainerLocationUtil
|
||||
.getContainerDBFile(metaDir, containerID);
|
||||
|
||||
if (!dbFile.exists() || !dbFile.canRead()) {
|
||||
|
||||
String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString()
|
||||
+ "] for Container [" + containerID + "] metadata path ["
|
||||
+ metadataPath + "]";
|
||||
error = KvCheckError.DB_ACCESS;
|
||||
handleCorruption(dbFileErrorMsg, error, null);
|
||||
return error;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
String dbFileErrorMessage =
|
||||
"Exception when initializing DBFile" + "with metadatapath ["
|
||||
+ metadataPath + "] for Container [" + containerID
|
||||
+ "]";
|
||||
error = KvCheckError.DB_ACCESS;
|
||||
handleCorruption(dbFileErrorMessage, error, e);
|
||||
return error;
|
||||
if (!dbFile.exists() || !dbFile.canRead()) {
|
||||
String dbFileErrorMsg = "Unable to access DB File [" + dbFile.toString()
|
||||
+ "] for Container [" + containerID + "] metadata path ["
|
||||
+ metadataPath + "]";
|
||||
throw new IOException(dbFileErrorMsg);
|
||||
}
|
||||
|
||||
|
||||
onDiskContainerData.setDbFile(dbFile);
|
||||
MetadataStore db = BlockUtils
|
||||
.getDB(onDiskContainerData, checkConfig);
|
||||
|
||||
try {
|
||||
MetadataStore db = BlockUtils
|
||||
.getDB(onDiskContainerData, checkConfig);
|
||||
error = iterateBlockDB(db);
|
||||
} catch (Exception e) {
|
||||
error = KvCheckError.DB_ITERATOR;
|
||||
handleCorruption("Block DB Iterator aborted", error, e);
|
||||
return error;
|
||||
}
|
||||
|
||||
return error;
|
||||
iterateBlockDB(db);
|
||||
}
|
||||
|
||||
private KvCheckError iterateBlockDB(MetadataStore db)
|
||||
private void iterateBlockDB(MetadataStore db)
|
||||
throws IOException {
|
||||
KvCheckError error = KvCheckError.ERROR_NONE;
|
||||
Preconditions.checkState(db != null);
|
||||
|
||||
// get "normal" keys from the Block DB
|
||||
|
@ -328,103 +257,39 @@ public class KeyValueContainerCheck {
|
|||
List<ContainerProtos.ChunkInfo> chunkInfoList = block.getChunks();
|
||||
for (ContainerProtos.ChunkInfo chunk : chunkInfoList) {
|
||||
File chunkFile;
|
||||
try {
|
||||
chunkFile = ChunkUtils
|
||||
.getChunkFile(onDiskContainerData,
|
||||
ChunkInfo.getFromProtoBuf(chunk));
|
||||
} catch (Exception e) {
|
||||
error = KvCheckError.MISSING_CHUNK_FILE;
|
||||
handleCorruption("Unable to access chunk path", error, e);
|
||||
return error;
|
||||
}
|
||||
chunkFile = ChunkUtils.getChunkFile(onDiskContainerData,
|
||||
ChunkInfo.getFromProtoBuf(chunk));
|
||||
|
||||
if (!chunkFile.exists()) {
|
||||
error = KvCheckError.MISSING_CHUNK_FILE;
|
||||
|
||||
// concurrent mutation in Block DB? lookup the block again.
|
||||
byte[] bdata = db.get(
|
||||
Longs.toByteArray(block.getBlockID().getLocalID()));
|
||||
if (bdata == null) {
|
||||
LOG.trace("concurrency with delete, ignoring deleted block");
|
||||
error = KvCheckError.ERROR_NONE;
|
||||
break; // skip to next block from kvIter
|
||||
} else {
|
||||
handleCorruption("Missing chunk file", error, null);
|
||||
return error;
|
||||
String errorStr = "Missing chunk file "
|
||||
+ chunkFile.getAbsolutePath();
|
||||
throw new IOException(errorStr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
private KvCheckError loadContainerData() {
|
||||
KvCheckError error = KvCheckError.ERROR_NONE;
|
||||
private void loadContainerData() throws IOException {
|
||||
|
||||
File containerFile = KeyValueContainer
|
||||
.getContainerFile(metadataPath.toString(), containerID);
|
||||
|
||||
try {
|
||||
onDiskContainerData = (KeyValueContainerData) ContainerDataYaml
|
||||
.readContainerFile(containerFile);
|
||||
} catch (IOException e) {
|
||||
error = KvCheckError.FILE_LOAD;
|
||||
handleCorruption("Unable to load Container File", error, e);
|
||||
}
|
||||
|
||||
return error;
|
||||
onDiskContainerData = (KeyValueContainerData) ContainerDataYaml
|
||||
.readContainerFile(containerFile);
|
||||
}
|
||||
|
||||
private KvCheckAction handleCorruption(String reason,
|
||||
KvCheckError error, Exception e) {
|
||||
|
||||
// XXX HDDS-1201 need to implement corruption handling/reporting
|
||||
|
||||
private void handleCorruption(IOException e) {
|
||||
String errStr =
|
||||
"Corruption detected in container: [" + containerID + "] reason: ["
|
||||
+ reason + "] error code: [" + error + "]";
|
||||
String logMessage = null;
|
||||
|
||||
StackTraceElement[] stackeElems = Thread.currentThread().getStackTrace();
|
||||
String caller =
|
||||
"Corruption reported from Source File: [" + stackeElems[2].getFileName()
|
||||
+ "] Line: [" + stackeElems[2].getLineNumber() + "]";
|
||||
|
||||
if (e != null) {
|
||||
logMessage = errStr + " exception: [" + e.getMessage() + "]";
|
||||
e.printStackTrace();
|
||||
} else {
|
||||
logMessage = errStr;
|
||||
}
|
||||
|
||||
LOG.error(caller);
|
||||
"Corruption detected in container: [" + containerID + "] ";
|
||||
String logMessage = errStr + "Exception: [" + e.getMessage() + "]";
|
||||
LOG.error(logMessage);
|
||||
|
||||
return KvCheckAction.ABORT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-defined error codes for Container Metadata check.
|
||||
*/
|
||||
public enum KvCheckError {
|
||||
ERROR_NONE,
|
||||
FILE_LOAD, // unable to load container metafile
|
||||
METADATA_PATH_ACCESS, // metadata path is not accessible
|
||||
CHUNKS_PATH_ACCESS, // chunks path is not accessible
|
||||
CONTAINERDATA_ID, // bad Container-ID stored in Container file
|
||||
CONTAINERDATA_METADATA_PATH, // bad metadata path in Container file
|
||||
CONTAINERDATA_CHUNKS_PATH, // bad chunks path in Container file
|
||||
CONTAINERDATA_CKSUM, // container file checksum mismatch
|
||||
CONTAINERDATA_TYPE, // container file incorrect type of Container
|
||||
CONTAINERDATA_DBTYPE, // unknown DB Type specified in Container File
|
||||
DB_ACCESS, // unable to load Metastore DB
|
||||
DB_ITERATOR, // unable to create block iterator for Metastore DB
|
||||
MISSING_CHUNK_FILE // chunk file not found
|
||||
}
|
||||
|
||||
private enum KvCheckAction {
|
||||
CONTINUE, // Continue with remaining checks on the corrupt Container
|
||||
ABORT // Abort checks for the container
|
||||
}
|
||||
}
|
|
@ -55,6 +55,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL;
|
|||
|
||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB;
|
||||
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_ROCKSDB;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
|
@ -100,7 +101,7 @@ import static org.junit.Assert.assertTrue;
|
|||
int deletedBlocks = 1;
|
||||
int normalBlocks = 3;
|
||||
int chunksPerBlock = 4;
|
||||
KeyValueContainerCheck.KvCheckError error;
|
||||
boolean corruption = false;
|
||||
|
||||
// test Closed Container
|
||||
createContainerWithBlocks(containerID, normalBlocks, deletedBlocks, 65536,
|
||||
|
@ -114,14 +115,14 @@ import static org.junit.Assert.assertTrue;
|
|||
containerID);
|
||||
|
||||
// first run checks on a Open Container
|
||||
error = kvCheck.fastCheck();
|
||||
assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE);
|
||||
corruption = kvCheck.fastCheck();
|
||||
assertFalse(corruption);
|
||||
|
||||
container.close();
|
||||
|
||||
// next run checks on a Closed Container
|
||||
error = kvCheck.fullCheck();
|
||||
assertTrue(error == KeyValueContainerCheck.KvCheckError.ERROR_NONE);
|
||||
corruption = kvCheck.fullCheck();
|
||||
assertFalse(corruption);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue