HDDS-1026. Reads should fail over to alternate replica. Contributed by Shashikant Banerjee.
This commit is contained in:
parent
e50dc7ee59
commit
965d26c9c7
|
@ -57,6 +57,7 @@ import java.util.concurrent.ExecutionException;
|
|||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* A Client for the storageContainer protocol.
|
||||
|
@ -198,11 +199,27 @@ public class XceiverClientGrpc extends XceiverClientSpi {
|
|||
@Override
|
||||
public ContainerCommandResponseProto sendCommand(
|
||||
ContainerCommandRequestProto request) throws IOException {
|
||||
return sendCommandWithRetry(request);
|
||||
try {
|
||||
XceiverClientReply reply;
|
||||
reply = sendCommandWithRetry(request, null);
|
||||
ContainerCommandResponseProto responseProto = reply.getResponse().get();
|
||||
return responseProto;
|
||||
} catch (ExecutionException | InterruptedException e) {
|
||||
throw new IOException("Failed to execute command " + request, e);
|
||||
}
|
||||
}
|
||||
|
||||
public ContainerCommandResponseProto sendCommandWithRetry(
|
||||
ContainerCommandRequestProto request) throws IOException {
|
||||
@Override
|
||||
public XceiverClientReply sendCommand(
|
||||
ContainerCommandRequestProto request, List<UUID> excludeDns)
|
||||
throws IOException {
|
||||
Preconditions.checkState(HddsUtils.isReadOnly(request));
|
||||
return sendCommandWithRetry(request, excludeDns);
|
||||
}
|
||||
|
||||
private XceiverClientReply sendCommandWithRetry(
|
||||
ContainerCommandRequestProto request, List<UUID> excludeDns)
|
||||
throws IOException {
|
||||
ContainerCommandResponseProto responseProto = null;
|
||||
|
||||
// In case of an exception or an error, we will try to read from the
|
||||
|
@ -211,13 +228,24 @@ public class XceiverClientGrpc extends XceiverClientSpi {
|
|||
// TODO: cache the correct leader info in here, so that any subsequent calls
|
||||
// should first go to leader
|
||||
List<DatanodeDetails> dns = pipeline.getNodes();
|
||||
for (DatanodeDetails dn : dns) {
|
||||
DatanodeDetails datanode = null;
|
||||
List<DatanodeDetails> healthyDns =
|
||||
excludeDns != null ? dns.stream().filter(dnId -> {
|
||||
for (UUID excludeId : excludeDns) {
|
||||
if (dnId.getUuid().equals(excludeId)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}).collect(Collectors.toList()) : dns;
|
||||
for (DatanodeDetails dn : healthyDns) {
|
||||
try {
|
||||
LOG.debug("Executing command " + request + " on datanode " + dn);
|
||||
// In case the command gets retried on a 2nd datanode,
|
||||
// sendCommandAsyncCall will create a new channel and async stub
|
||||
// in case these don't exist for the specific datanode.
|
||||
responseProto = sendCommandAsync(request, dn).getResponse().get();
|
||||
datanode = dn;
|
||||
if (responseProto.getResult() == ContainerProtos.Result.SUCCESS) {
|
||||
break;
|
||||
}
|
||||
|
@ -226,14 +254,15 @@ public class XceiverClientGrpc extends XceiverClientSpi {
|
|||
.getUuidString(), e);
|
||||
if (Status.fromThrowable(e.getCause()).getCode()
|
||||
== Status.UNAUTHENTICATED.getCode()) {
|
||||
throw new SCMSecurityException("Failed to authenticate with " +
|
||||
"GRPC XceiverServer with Ozone block token.");
|
||||
throw new SCMSecurityException("Failed to authenticate with "
|
||||
+ "GRPC XceiverServer with Ozone block token.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (responseProto != null) {
|
||||
return responseProto;
|
||||
return new XceiverClientReply(
|
||||
CompletableFuture.completedFuture(responseProto), datanode.getUuid());
|
||||
} else {
|
||||
throw new IOException(
|
||||
"Failed to execute command " + request + " on the pipeline "
|
||||
|
@ -256,10 +285,10 @@ public class XceiverClientGrpc extends XceiverClientSpi {
|
|||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public XceiverClientAsyncReply sendCommandAsync(
|
||||
public XceiverClientReply sendCommandAsync(
|
||||
ContainerCommandRequestProto request)
|
||||
throws IOException, ExecutionException, InterruptedException {
|
||||
XceiverClientAsyncReply asyncReply =
|
||||
XceiverClientReply asyncReply =
|
||||
sendCommandAsync(request, pipeline.getFirstNode());
|
||||
|
||||
// TODO : for now make this API sync in nature as async requests are
|
||||
|
@ -272,7 +301,7 @@ public class XceiverClientGrpc extends XceiverClientSpi {
|
|||
return asyncReply;
|
||||
}
|
||||
|
||||
private XceiverClientAsyncReply sendCommandAsync(
|
||||
private XceiverClientReply sendCommandAsync(
|
||||
ContainerCommandRequestProto request, DatanodeDetails dn)
|
||||
throws IOException, ExecutionException, InterruptedException {
|
||||
if (closed) {
|
||||
|
@ -327,7 +356,7 @@ public class XceiverClientGrpc extends XceiverClientSpi {
|
|||
});
|
||||
requestObserver.onNext(request);
|
||||
requestObserver.onCompleted();
|
||||
return new XceiverClientAsyncReply(replyFuture);
|
||||
return new XceiverClientReply(replyFuture);
|
||||
}
|
||||
|
||||
private void reconnect(DatanodeDetails dn, String encodedToken)
|
||||
|
|
|
@ -204,7 +204,6 @@ public final class XceiverClientRatis extends XceiverClientSpi {
|
|||
return minIndex.isPresent() ? minIndex.getAsLong() : 0;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long watchForCommit(long index, long timeout)
|
||||
throws InterruptedException, ExecutionException, TimeoutException,
|
||||
|
@ -254,7 +253,7 @@ public final class XceiverClientRatis extends XceiverClientSpi {
|
|||
commitInfoMap.remove(address);
|
||||
LOG.info(
|
||||
"Could not commit " + index + " to all the nodes. Server " + address
|
||||
+ " has failed" + "Committed by majority.");
|
||||
+ " has failed." + " Committed by majority.");
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
@ -266,9 +265,9 @@ public final class XceiverClientRatis extends XceiverClientSpi {
|
|||
* @return Response to the command
|
||||
*/
|
||||
@Override
|
||||
public XceiverClientAsyncReply sendCommandAsync(
|
||||
public XceiverClientReply sendCommandAsync(
|
||||
ContainerCommandRequestProto request) {
|
||||
XceiverClientAsyncReply asyncReply = new XceiverClientAsyncReply(null);
|
||||
XceiverClientReply asyncReply = new XceiverClientReply(null);
|
||||
CompletableFuture<RaftClientReply> raftClientReply =
|
||||
sendRequestAsync(request);
|
||||
CompletableFuture<ContainerCommandResponseProto> containerCommandResponse =
|
||||
|
@ -291,6 +290,8 @@ public final class XceiverClientRatis extends XceiverClientSpi {
|
|||
if (response.getResult() == ContainerProtos.Result.SUCCESS) {
|
||||
updateCommitInfosMap(reply.getCommitInfos());
|
||||
asyncReply.setLogIndex(reply.getLogIndex());
|
||||
asyncReply.setDatanode(
|
||||
RatisHelper.toDatanodeId(reply.getReplierId()));
|
||||
}
|
||||
return response;
|
||||
} catch (InvalidProtocolBufferException e) {
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
|
||||
package org.apache.hadoop.hdds.scm.storage;
|
||||
|
||||
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
||||
import org.apache.hadoop.hdds.scm.XceiverClientReply;
|
||||
import org.apache.hadoop.hdds.scm.container.common.helpers
|
||||
.StorageContainerException;
|
||||
import org.apache.hadoop.ozone.common.Checksum;
|
||||
|
@ -35,8 +38,11 @@ import java.io.EOFException;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
/**
|
||||
* An {@link InputStream} used by the REST service in combination with the
|
||||
|
@ -204,27 +210,57 @@ public class BlockInputStream extends InputStream implements Seekable {
|
|||
// On every chunk read chunkIndex should be increased so as to read the
|
||||
// next chunk
|
||||
chunkIndex += 1;
|
||||
final ReadChunkResponseProto readChunkResponse;
|
||||
XceiverClientReply reply;
|
||||
ReadChunkResponseProto readChunkResponse = null;
|
||||
final ChunkInfo chunkInfo = chunks.get(chunkIndex);
|
||||
List<UUID> excludeDns = null;
|
||||
ByteString byteString;
|
||||
List<DatanodeDetails> dnList = xceiverClient.getPipeline().getNodes();
|
||||
while (true) {
|
||||
try {
|
||||
readChunkResponse = ContainerProtocolCalls
|
||||
.readChunk(xceiverClient, chunkInfo, blockID, traceID);
|
||||
reply = ContainerProtocolCalls
|
||||
.readChunk(xceiverClient, chunkInfo, blockID, traceID, excludeDns);
|
||||
ContainerProtos.ContainerCommandResponseProto response;
|
||||
response = reply.getResponse().get();
|
||||
ContainerProtocolCalls.validateContainerResponse(response);
|
||||
readChunkResponse = response.getReadChunk();
|
||||
} catch (IOException e) {
|
||||
if (e instanceof StorageContainerException) {
|
||||
throw e;
|
||||
}
|
||||
throw new IOException("Unexpected OzoneException: " + e.toString(), e);
|
||||
} catch (ExecutionException | InterruptedException e) {
|
||||
throw new IOException(
|
||||
"Failed to execute ReadChunk command for chunk " + chunkInfo
|
||||
.getChunkName(), e);
|
||||
}
|
||||
ByteString byteString = readChunkResponse.getData();
|
||||
byteString = readChunkResponse.getData();
|
||||
try {
|
||||
if (byteString.size() != chunkInfo.getLen()) {
|
||||
// Bytes read from chunk should be equal to chunk size.
|
||||
throw new IOException(String
|
||||
.format("Inconsistent read for chunk=%s len=%d bytesRead=%d",
|
||||
chunkInfo.getChunkName(), chunkInfo.getLen(), byteString.size()));
|
||||
chunkInfo.getChunkName(), chunkInfo.getLen(),
|
||||
byteString.size()));
|
||||
}
|
||||
ChecksumData checksumData = ChecksumData.getFromProtoBuf(
|
||||
chunkInfo.getChecksumData());
|
||||
ChecksumData checksumData =
|
||||
ChecksumData.getFromProtoBuf(chunkInfo.getChecksumData());
|
||||
Checksum.verifyChecksum(byteString, checksumData);
|
||||
break;
|
||||
} catch (IOException ioe) {
|
||||
// we will end up in this situation only if the checksum mismatch
|
||||
// happens or the length of the chunk mismatches.
|
||||
// In this case, read should be retried on a different replica.
|
||||
// TODO: Inform SCM of a possible corrupt container replica here
|
||||
if (excludeDns == null) {
|
||||
excludeDns = new ArrayList<>();
|
||||
}
|
||||
excludeDns.add(reply.getDatanode());
|
||||
if (excludeDns.size() == dnList.size()) {
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buffers = byteString.asReadOnlyByteBufferList();
|
||||
bufferIndex = 0;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
package org.apache.hadoop.hdds.scm.storage;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
||||
import org.apache.hadoop.hdds.scm.XceiverClientAsyncReply;
|
||||
import org.apache.hadoop.hdds.scm.XceiverClientReply;
|
||||
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||
import org.apache.hadoop.ozone.common.Checksum;
|
||||
|
@ -379,7 +379,7 @@ public class BlockOutputStream extends OutputStream {
|
|||
CompletableFuture<ContainerProtos.
|
||||
ContainerCommandResponseProto> flushFuture;
|
||||
try {
|
||||
XceiverClientAsyncReply asyncReply =
|
||||
XceiverClientReply asyncReply =
|
||||
putBlockAsync(xceiverClient, containerBlockData.build(), requestId);
|
||||
CompletableFuture<ContainerProtos.ContainerCommandResponseProto> future =
|
||||
asyncReply.getResponse();
|
||||
|
@ -598,7 +598,7 @@ public class BlockOutputStream extends OutputStream {
|
|||
traceID + ContainerProtos.Type.WriteChunk + chunkIndex + chunkInfo
|
||||
.getChunkName();
|
||||
try {
|
||||
XceiverClientAsyncReply asyncReply =
|
||||
XceiverClientReply asyncReply =
|
||||
writeChunkAsync(xceiverClient, chunkInfo, blockID, data, requestId);
|
||||
CompletableFuture<ContainerProtos.ContainerCommandResponseProto> future =
|
||||
asyncReply.getResponse();
|
||||
|
|
|
@ -21,20 +21,29 @@ package org.apache.hadoop.hdds.scm;
|
|||
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
.ContainerCommandResponseProto;
|
||||
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* This class represents the Async reply from XceiverClient.
|
||||
*/
|
||||
public class XceiverClientAsyncReply {
|
||||
public class XceiverClientReply {
|
||||
|
||||
private CompletableFuture<ContainerCommandResponseProto> response;
|
||||
private Long logIndex;
|
||||
private UUID dnId;
|
||||
|
||||
public XceiverClientAsyncReply(
|
||||
public XceiverClientReply(
|
||||
CompletableFuture<ContainerCommandResponseProto> response) {
|
||||
this.logIndex = (long)0;
|
||||
this(response, null);
|
||||
}
|
||||
|
||||
public XceiverClientReply(
|
||||
CompletableFuture<ContainerCommandResponseProto> response, UUID dnId) {
|
||||
this.logIndex = (long) 0;
|
||||
this.response = response;
|
||||
this.dnId = dnId;
|
||||
}
|
||||
|
||||
public CompletableFuture<ContainerCommandResponseProto> getResponse() {
|
||||
|
@ -49,6 +58,14 @@ public class XceiverClientAsyncReply {
|
|||
this.logIndex = logIndex;
|
||||
}
|
||||
|
||||
public UUID getDatanode() {
|
||||
return dnId;
|
||||
}
|
||||
|
||||
public void setDatanode(UUID datanodeId) {
|
||||
this.dnId = datanodeId;
|
||||
}
|
||||
|
||||
public void setResponse(
|
||||
CompletableFuture<ContainerCommandResponseProto> response) {
|
||||
this.response = response;
|
|
@ -28,6 +28,8 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
@ -104,7 +106,7 @@ public abstract class XceiverClientSpi implements Closeable {
|
|||
public ContainerCommandResponseProto sendCommand(
|
||||
ContainerCommandRequestProto request) throws IOException {
|
||||
try {
|
||||
XceiverClientAsyncReply reply;
|
||||
XceiverClientReply reply;
|
||||
reply = sendCommandAsync(request);
|
||||
ContainerCommandResponseProto responseProto = reply.getResponse().get();
|
||||
return responseProto;
|
||||
|
@ -113,6 +115,27 @@ public abstract class XceiverClientSpi implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a given command to server and gets the reply back along with
|
||||
* the server associated info.
|
||||
* @param request Request
|
||||
* @param excludeDns list of servers on which the command won't be sent to.
|
||||
* @return Response to the command
|
||||
* @throws IOException
|
||||
*/
|
||||
public XceiverClientReply sendCommand(
|
||||
ContainerCommandRequestProto request, List<UUID> excludeDns)
|
||||
throws IOException {
|
||||
try {
|
||||
XceiverClientReply reply;
|
||||
reply = sendCommandAsync(request);
|
||||
reply.getResponse().get();
|
||||
return reply;
|
||||
} catch (ExecutionException | InterruptedException e) {
|
||||
throw new IOException("Failed to command " + request, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a given command to server gets a waitable future back.
|
||||
*
|
||||
|
@ -120,7 +143,7 @@ public abstract class XceiverClientSpi implements Closeable {
|
|||
* @return Response to the command
|
||||
* @throws IOException
|
||||
*/
|
||||
public abstract XceiverClientAsyncReply
|
||||
public abstract XceiverClientReply
|
||||
sendCommandAsync(ContainerCommandRequestProto request)
|
||||
throws IOException, ExecutionException, InterruptedException;
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
package org.apache.hadoop.hdds.scm.storage;
|
||||
|
||||
import org.apache.hadoop.hdds.scm.XceiverClientAsyncReply;
|
||||
import org.apache.hadoop.hdds.scm.XceiverClientReply;
|
||||
import org.apache.hadoop.hdds.scm.container.common.helpers
|
||||
.BlockNotCommittedException;
|
||||
import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier;
|
||||
|
@ -57,8 +57,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
|||
.PutSmallFileRequestProto;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
.ReadChunkRequestProto;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
.ReadChunkResponseProto;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
.ReadContainerRequestProto;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
|
@ -72,6 +70,8 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.
|
|||
import org.apache.hadoop.hdds.client.BlockID;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
/**
|
||||
|
@ -199,7 +199,7 @@ public final class ContainerProtocolCalls {
|
|||
* @throws InterruptedException
|
||||
* @throws ExecutionException
|
||||
*/
|
||||
public static XceiverClientAsyncReply putBlockAsync(
|
||||
public static XceiverClientReply putBlockAsync(
|
||||
XceiverClientSpi xceiverClient, BlockData containerBlockData,
|
||||
String traceID)
|
||||
throws IOException, InterruptedException, ExecutionException {
|
||||
|
@ -217,7 +217,6 @@ public final class ContainerProtocolCalls {
|
|||
builder.setEncodedToken(encodedToken);
|
||||
}
|
||||
ContainerCommandRequestProto request = builder.build();
|
||||
xceiverClient.sendCommand(request);
|
||||
return xceiverClient.sendCommandAsync(request);
|
||||
}
|
||||
|
||||
|
@ -228,11 +227,13 @@ public final class ContainerProtocolCalls {
|
|||
* @param chunk information about chunk to read
|
||||
* @param blockID ID of the block
|
||||
* @param traceID container protocol call args
|
||||
* @param excludeDns datamode to exclude while executing the command
|
||||
* @return container protocol read chunk response
|
||||
* @throws IOException if there is an I/O error while performing the call
|
||||
*/
|
||||
public static ReadChunkResponseProto readChunk(XceiverClientSpi xceiverClient,
|
||||
ChunkInfo chunk, BlockID blockID, String traceID) throws IOException {
|
||||
public static XceiverClientReply readChunk(XceiverClientSpi xceiverClient,
|
||||
ChunkInfo chunk, BlockID blockID, String traceID, List<UUID> excludeDns)
|
||||
throws IOException {
|
||||
ReadChunkRequestProto.Builder readChunkRequest = ReadChunkRequestProto
|
||||
.newBuilder()
|
||||
.setBlockID(blockID.getDatanodeBlockIDProtobuf())
|
||||
|
@ -251,9 +252,9 @@ public final class ContainerProtocolCalls {
|
|||
builder.setEncodedToken(encodedToken);
|
||||
}
|
||||
ContainerCommandRequestProto request = builder.build();
|
||||
ContainerCommandResponseProto response = xceiverClient.sendCommand(request);
|
||||
validateContainerResponse(response);
|
||||
return response.getReadChunk();
|
||||
XceiverClientReply reply =
|
||||
xceiverClient.sendCommand(request, excludeDns);
|
||||
return reply;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -302,7 +303,7 @@ public final class ContainerProtocolCalls {
|
|||
* @param traceID container protocol call args
|
||||
* @throws IOException if there is an I/O error while performing the call
|
||||
*/
|
||||
public static XceiverClientAsyncReply writeChunkAsync(
|
||||
public static XceiverClientReply writeChunkAsync(
|
||||
XceiverClientSpi xceiverClient, ChunkInfo chunk, BlockID blockID,
|
||||
ByteString data, String traceID)
|
||||
throws IOException, ExecutionException, InterruptedException {
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.lang3.RandomStringUtils;
|
||||
import org.apache.commons.lang3.RandomUtils;
|
||||
|
@ -879,7 +880,7 @@ public abstract class TestOzoneRpcClientAbstract {
|
|||
|
||||
// Write data into a key
|
||||
OzoneOutputStream out = bucket.createKey(keyName,
|
||||
value.getBytes().length, ReplicationType.STAND_ALONE,
|
||||
value.getBytes().length, ReplicationType.RATIS,
|
||||
ReplicationFactor.ONE, new HashMap<>());
|
||||
out.write(value.getBytes());
|
||||
out.close();
|
||||
|
@ -889,8 +890,6 @@ public abstract class TestOzoneRpcClientAbstract {
|
|||
OzoneKey key = bucket.getKey(keyName);
|
||||
long containerID = ((OzoneKeyDetails) key).getOzoneKeyLocations().get(0)
|
||||
.getContainerID();
|
||||
long localID = ((OzoneKeyDetails) key).getOzoneKeyLocations().get(0)
|
||||
.getLocalID();
|
||||
|
||||
// Get the container by traversing the datanodes. Atleast one of the
|
||||
// datanode must have this container.
|
||||
|
@ -903,15 +902,114 @@ public abstract class TestOzoneRpcClientAbstract {
|
|||
}
|
||||
}
|
||||
Assert.assertNotNull("Container not found", container);
|
||||
corruptData(container, key);
|
||||
|
||||
// Try reading the key. Since the chunk file is corrupted, it should
|
||||
// throw a checksum mismatch exception.
|
||||
try {
|
||||
OzoneInputStream is = bucket.readKey(keyName);
|
||||
is.read(new byte[100]);
|
||||
fail("Reading corrupted data should fail.");
|
||||
} catch (OzoneChecksumException e) {
|
||||
GenericTestUtils.assertExceptionContains("Checksum mismatch", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests reading a corrputed chunk file throws checksum exception.
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testReadKeyWithCorruptedDataWithMutiNodes() throws IOException {
|
||||
String volumeName = UUID.randomUUID().toString();
|
||||
String bucketName = UUID.randomUUID().toString();
|
||||
|
||||
String value = "sample value";
|
||||
byte[] data = value.getBytes();
|
||||
store.createVolume(volumeName);
|
||||
OzoneVolume volume = store.getVolume(volumeName);
|
||||
volume.createBucket(bucketName);
|
||||
OzoneBucket bucket = volume.getBucket(bucketName);
|
||||
String keyName = UUID.randomUUID().toString();
|
||||
|
||||
// Write data into a key
|
||||
OzoneOutputStream out = bucket.createKey(keyName,
|
||||
value.getBytes().length, ReplicationType.RATIS,
|
||||
ReplicationFactor.THREE, new HashMap<>());
|
||||
out.write(value.getBytes());
|
||||
out.close();
|
||||
|
||||
// We need to find the location of the chunk file corresponding to the
|
||||
// data we just wrote.
|
||||
OzoneKey key = bucket.getKey(keyName);
|
||||
List<OzoneKeyLocation> keyLocation =
|
||||
((OzoneKeyDetails) key).getOzoneKeyLocations();
|
||||
Assert.assertTrue("Key location not found in OM", !keyLocation.isEmpty());
|
||||
long containerID = ((OzoneKeyDetails) key).getOzoneKeyLocations().get(0)
|
||||
.getContainerID();
|
||||
|
||||
// Get the container by traversing the datanodes.
|
||||
List<Container> containerList = new ArrayList<>();
|
||||
Container container;
|
||||
for (HddsDatanodeService hddsDatanode : cluster.getHddsDatanodes()) {
|
||||
container = hddsDatanode.getDatanodeStateMachine().getContainer()
|
||||
.getContainerSet().getContainer(containerID);
|
||||
if (container != null) {
|
||||
containerList.add(container);
|
||||
if (containerList.size() == 3) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Assert.assertTrue("Container not found", !containerList.isEmpty());
|
||||
corruptData(containerList.get(0), key);
|
||||
// Try reading the key. Read will fail on the first node and will eventually
|
||||
// failover to next replica
|
||||
try {
|
||||
OzoneInputStream is = bucket.readKey(keyName);
|
||||
byte[] b = new byte[data.length];
|
||||
is.read(b);
|
||||
Assert.assertTrue(Arrays.equals(b, data));
|
||||
} catch (OzoneChecksumException e) {
|
||||
fail("Reading corrupted data should not fail.");
|
||||
}
|
||||
corruptData(containerList.get(1), key);
|
||||
// Try reading the key. Read will fail on the first node and will eventually
|
||||
// failover to next replica
|
||||
try {
|
||||
OzoneInputStream is = bucket.readKey(keyName);
|
||||
byte[] b = new byte[data.length];
|
||||
is.read(b);
|
||||
Assert.assertTrue(Arrays.equals(b, data));
|
||||
} catch (OzoneChecksumException e) {
|
||||
fail("Reading corrupted data should not fail.");
|
||||
}
|
||||
corruptData(containerList.get(2), key);
|
||||
// Try reading the key. Read will fail here as all the replica are corrupt
|
||||
try {
|
||||
OzoneInputStream is = bucket.readKey(keyName);
|
||||
byte[] b = new byte[data.length];
|
||||
is.read(b);
|
||||
fail("Reading corrupted data should fail.");
|
||||
} catch (OzoneChecksumException e) {
|
||||
GenericTestUtils.assertExceptionContains("Checksum mismatch", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void corruptData(Container container, OzoneKey key)
|
||||
throws IOException {
|
||||
long containerID = ((OzoneKeyDetails) key).getOzoneKeyLocations().get(0)
|
||||
.getContainerID();
|
||||
long localID = ((OzoneKeyDetails) key).getOzoneKeyLocations().get(0)
|
||||
.getLocalID();
|
||||
// From the containerData, get the block iterator for all the blocks in
|
||||
// the container.
|
||||
KeyValueContainerData containerData =
|
||||
(KeyValueContainerData) container.getContainerData();
|
||||
String containerPath = new File(containerData.getMetadataPath())
|
||||
.getParent();
|
||||
KeyValueBlockIterator keyValueBlockIterator = new KeyValueBlockIterator(
|
||||
containerID, new File(containerPath));
|
||||
String containerPath =
|
||||
new File(containerData.getMetadataPath()).getParent();
|
||||
KeyValueBlockIterator keyValueBlockIterator =
|
||||
new KeyValueBlockIterator(containerID, new File(containerPath));
|
||||
|
||||
// Find the block corresponding to the key we put. We use the localID of
|
||||
// the BlockData to identify out key.
|
||||
|
@ -926,8 +1024,8 @@ public abstract class TestOzoneRpcClientAbstract {
|
|||
|
||||
// Get the location of the chunk file
|
||||
String chunkName = blockData.getChunks().get(0).getChunkName();
|
||||
String containreBaseDir = container.getContainerData().getVolume()
|
||||
.getHddsRootDir().getPath();
|
||||
String containreBaseDir =
|
||||
container.getContainerData().getVolume().getHddsRootDir().getPath();
|
||||
File chunksLocationPath = KeyValueContainerLocationUtil
|
||||
.getChunksLocationPath(containreBaseDir, SCM_ID, containerID);
|
||||
File chunkFile = new File(chunksLocationPath, chunkName);
|
||||
|
@ -935,16 +1033,6 @@ public abstract class TestOzoneRpcClientAbstract {
|
|||
// Corrupt the contents of the chunk file
|
||||
String newData = new String("corrupted data");
|
||||
FileUtils.writeByteArrayToFile(chunkFile, newData.getBytes());
|
||||
|
||||
// Try reading the key. Since the chunk file is corrupted, it should
|
||||
// throw a checksum mismatch exception.
|
||||
try {
|
||||
OzoneInputStream is = bucket.readKey(keyName);
|
||||
is.read(new byte[100]);
|
||||
fail("Reading corrupted data should fail.");
|
||||
} catch (OzoneChecksumException e) {
|
||||
GenericTestUtils.assertExceptionContains("Checksum mismatch", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue