HDDS-1558. IllegalArgumentException while processing container Reports.
Signed-off-by: Nanda kumar <nanda@apache.org>
This commit is contained in:
parent
bd2590d71b
commit
f3271126fc
|
@ -67,6 +67,7 @@ import io.opentracing.Scope;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -299,8 +300,18 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor {
|
||||||
State containerState = container.getContainerData().getState();
|
State containerState = container.getContainerData().getState();
|
||||||
Preconditions.checkState(
|
Preconditions.checkState(
|
||||||
containerState == State.OPEN || containerState == State.CLOSING);
|
containerState == State.OPEN || containerState == State.CLOSING);
|
||||||
container.getContainerData()
|
// mark and persist the container state to be unhealthy
|
||||||
.setState(ContainerDataProto.State.UNHEALTHY);
|
try {
|
||||||
|
handler.markContainerUhealthy(container);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// just log the error here in case marking the container fails,
|
||||||
|
// Return the actual failure response to the client
|
||||||
|
LOG.error("Failed to mark container " + containerID + " UNHEALTHY. ",
|
||||||
|
ioe);
|
||||||
|
}
|
||||||
|
// in any case, the in memory state of the container should be unhealthy
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
container.getContainerData().getState() == State.UNHEALTHY);
|
||||||
sendCloseContainerActionIfNeeded(container);
|
sendCloseContainerActionIfNeeded(container);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -129,6 +129,15 @@ public abstract class Handler {
|
||||||
public abstract void markContainerForClose(Container container)
|
public abstract void markContainerForClose(Container container)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marks the container Unhealthy. Moves the container to UHEALTHY state.
|
||||||
|
*
|
||||||
|
* @param container container to update
|
||||||
|
* @throws IOException in case of exception
|
||||||
|
*/
|
||||||
|
public abstract void markContainerUhealthy(Container container)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Moves the Container to QUASI_CLOSED state.
|
* Moves the Container to QUASI_CLOSED state.
|
||||||
*
|
*
|
||||||
|
|
|
@ -339,8 +339,10 @@ public class KeyValueContainer implements Container<KeyValueContainerData> {
|
||||||
updateContainerFile(containerFile);
|
updateContainerFile(containerFile);
|
||||||
|
|
||||||
} catch (StorageContainerException ex) {
|
} catch (StorageContainerException ex) {
|
||||||
if (oldState != null) {
|
if (oldState != null
|
||||||
// Failed to update .container file. Reset the state to CLOSING
|
&& containerData.getState() != ContainerDataProto.State.UNHEALTHY) {
|
||||||
|
// Failed to update .container file. Reset the state to old state only
|
||||||
|
// if the current state is not unhealthy.
|
||||||
containerData.setState(oldState);
|
containerData.setState(oldState);
|
||||||
}
|
}
|
||||||
throw ex;
|
throw ex;
|
||||||
|
|
|
@ -892,6 +892,14 @@ public class KeyValueHandler extends Handler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markContainerUhealthy(Container container)
|
||||||
|
throws IOException {
|
||||||
|
// this will mark the container unhealthy and a close container action will
|
||||||
|
// be sent from the dispatcher ton SCM to close down this container.
|
||||||
|
container.markContainerUnhealthy();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void quasiCloseContainer(Container container)
|
public void quasiCloseContainer(Container container)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -920,6 +928,12 @@ public class KeyValueHandler extends Handler {
|
||||||
if (state == State.CLOSED) {
|
if (state == State.CLOSED) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (state == State.UNHEALTHY) {
|
||||||
|
throw new StorageContainerException(
|
||||||
|
"Cannot close container #" + container.getContainerData()
|
||||||
|
.getContainerID() + " while in " + state + " state.",
|
||||||
|
ContainerProtos.Result.CONTAINER_UNHEALTHY);
|
||||||
|
}
|
||||||
// The container has to be either in CLOSING or in QUASI_CLOSED state.
|
// The container has to be either in CLOSING or in QUASI_CLOSED state.
|
||||||
if (state != State.CLOSING && state != State.QUASI_CLOSED) {
|
if (state != State.CLOSING && state != State.QUASI_CLOSED) {
|
||||||
ContainerProtos.Result error = state == State.INVALID ?
|
ContainerProtos.Result error = state == State.INVALID ?
|
||||||
|
|
|
@ -24,12 +24,16 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConsts;
|
||||||
import org.apache.hadoop.ozone.client.ObjectStore;
|
import org.apache.hadoop.ozone.client.ObjectStore;
|
||||||
import org.apache.hadoop.ozone.client.OzoneClient;
|
import org.apache.hadoop.ozone.client.OzoneClient;
|
||||||
import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
||||||
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
|
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
|
||||||
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
|
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
|
||||||
|
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
|
||||||
|
import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
|
||||||
import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher;
|
import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher;
|
||||||
|
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
|
||||||
import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
|
import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
|
||||||
import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
|
import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
|
||||||
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
|
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
|
||||||
|
@ -49,10 +53,13 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.
|
||||||
HDDS_COMMAND_STATUS_REPORT_INTERVAL;
|
HDDS_COMMAND_STATUS_REPORT_INTERVAL;
|
||||||
import static org.apache.hadoop.hdds.HddsConfigKeys.
|
import static org.apache.hadoop.hdds.HddsConfigKeys.
|
||||||
HDDS_CONTAINER_REPORT_INTERVAL;
|
HDDS_CONTAINER_REPORT_INTERVAL;
|
||||||
|
import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY;
|
||||||
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.
|
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.
|
||||||
HDDS_SCM_WATCHER_TIMEOUT;
|
HDDS_SCM_WATCHER_TIMEOUT;
|
||||||
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.
|
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.
|
||||||
OZONE_SCM_STALENODE_INTERVAL;
|
OZONE_SCM_STALENODE_INTERVAL;
|
||||||
|
import static org.hamcrest.core.Is.is;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests the containerStateMachine failure handling.
|
* Tests the containerStateMachine failure handling.
|
||||||
|
@ -185,4 +192,82 @@ public class TestContainerStateMachineFailures {
|
||||||
Assert.assertEquals(ContainerProtos.Result.CONTAINER_MISSING,
|
Assert.assertEquals(ContainerProtos.Result.CONTAINER_MISSING,
|
||||||
dispatcher.dispatch(request.build(), null).getResult());
|
dispatcher.dispatch(request.build(), null).getResult());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnhealthyContainer() throws Exception {
|
||||||
|
OzoneOutputStream key =
|
||||||
|
objectStore.getVolume(volumeName).getBucket(bucketName)
|
||||||
|
.createKey("ratis", 1024, ReplicationType.RATIS,
|
||||||
|
ReplicationFactor.ONE, new HashMap<>());
|
||||||
|
// First write and flush creates a container in the datanode
|
||||||
|
key.write("ratis".getBytes());
|
||||||
|
key.flush();
|
||||||
|
key.write("ratis".getBytes());
|
||||||
|
|
||||||
|
//get the name of a valid container
|
||||||
|
OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName).
|
||||||
|
setBucketName(bucketName).setType(HddsProtos.ReplicationType.RATIS)
|
||||||
|
.setFactor(HddsProtos.ReplicationFactor.ONE).setKeyName("ratis")
|
||||||
|
.build();
|
||||||
|
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
|
||||||
|
List<OmKeyLocationInfo> locationInfoList =
|
||||||
|
groupOutputStream.getLocationInfoList();
|
||||||
|
Assert.assertEquals(1, locationInfoList.size());
|
||||||
|
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
|
||||||
|
ContainerData containerData =
|
||||||
|
cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
|
||||||
|
.getContainer().getContainerSet()
|
||||||
|
.getContainer(omKeyLocationInfo.getContainerID())
|
||||||
|
.getContainerData();
|
||||||
|
Assert.assertTrue(containerData instanceof KeyValueContainerData);
|
||||||
|
KeyValueContainerData keyValueContainerData =
|
||||||
|
(KeyValueContainerData) containerData;
|
||||||
|
// delete the container db file
|
||||||
|
FileUtil.fullyDelete(new File(keyValueContainerData.getChunksPath()));
|
||||||
|
try {
|
||||||
|
key.close();
|
||||||
|
Assert.fail();
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
Assert.assertTrue(ioe.getMessage().contains(
|
||||||
|
"Requested operation not allowed as ContainerState is UNHEALTHY"));
|
||||||
|
}
|
||||||
|
long containerID = omKeyLocationInfo.getContainerID();
|
||||||
|
|
||||||
|
// Make sure the container is marked unhealthy
|
||||||
|
Assert.assertTrue(
|
||||||
|
cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
|
||||||
|
.getContainer().getContainerSet().getContainer(containerID)
|
||||||
|
.getContainerState()
|
||||||
|
== ContainerProtos.ContainerDataProto.State.UNHEALTHY);
|
||||||
|
// Check metadata in the .container file
|
||||||
|
File containerFile = new File(keyValueContainerData.getMetadataPath(),
|
||||||
|
containerID + OzoneConsts.CONTAINER_EXTENSION);
|
||||||
|
|
||||||
|
keyValueContainerData = (KeyValueContainerData) ContainerDataYaml
|
||||||
|
.readContainerFile(containerFile);
|
||||||
|
assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
|
||||||
|
|
||||||
|
// restart the hdds datanode and see if the container is listed in the
|
||||||
|
// in the missing container set and not in the regular set
|
||||||
|
cluster.restartHddsDatanode(0, true);
|
||||||
|
// make sure the container state is still marked unhealthy after restart
|
||||||
|
keyValueContainerData = (KeyValueContainerData) ContainerDataYaml
|
||||||
|
.readContainerFile(containerFile);
|
||||||
|
assertThat(keyValueContainerData.getState(), is(UNHEALTHY));
|
||||||
|
|
||||||
|
OzoneContainer ozoneContainer;
|
||||||
|
ozoneContainer = cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
|
||||||
|
.getContainer();
|
||||||
|
HddsDispatcher dispatcher = (HddsDispatcher) ozoneContainer.getDispatcher();
|
||||||
|
ContainerProtos.ContainerCommandRequestProto.Builder request =
|
||||||
|
ContainerProtos.ContainerCommandRequestProto.newBuilder();
|
||||||
|
request.setCmdType(ContainerProtos.Type.CloseContainer);
|
||||||
|
request.setContainerID(containerID);
|
||||||
|
request.setCloseContainer(
|
||||||
|
ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
|
||||||
|
request.setDatanodeUuid(
|
||||||
|
cluster.getHddsDatanodes().get(0).getDatanodeDetails().getUuidString());
|
||||||
|
Assert.assertEquals(ContainerProtos.Result.CONTAINER_UNHEALTHY,
|
||||||
|
dispatcher.dispatch(request.build(), null).getResult());
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue