HDFS-9180. Update excluded DataNodes in DFSStripedOutputStream based on failures in data streamers. Contributed by Jing Zhao.
This commit is contained in:
parent
874c8ed239
commit
a8b4d0ff28
@ -377,17 +377,21 @@ private void handleStreamerFailure(String err, Exception e)
|
|||||||
|
|
||||||
private void replaceFailedStreamers() {
|
private void replaceFailedStreamers() {
|
||||||
assert streamers.size() == numAllBlocks;
|
assert streamers.size() == numAllBlocks;
|
||||||
|
final int currentIndex = getCurrentIndex();
|
||||||
|
assert currentIndex == 0;
|
||||||
for (short i = 0; i < numAllBlocks; i++) {
|
for (short i = 0; i < numAllBlocks; i++) {
|
||||||
final StripedDataStreamer oldStreamer = getStripedDataStreamer(i);
|
final StripedDataStreamer oldStreamer = getStripedDataStreamer(i);
|
||||||
if (!oldStreamer.isHealthy()) {
|
if (!oldStreamer.isHealthy()) {
|
||||||
|
LOG.info("replacing previously failed streamer " + oldStreamer);
|
||||||
StripedDataStreamer streamer = new StripedDataStreamer(oldStreamer.stat,
|
StripedDataStreamer streamer = new StripedDataStreamer(oldStreamer.stat,
|
||||||
dfsClient, src, oldStreamer.progress,
|
dfsClient, src, oldStreamer.progress,
|
||||||
oldStreamer.checksum4WriteBlock, cachingStrategy, byteArrayManager,
|
oldStreamer.checksum4WriteBlock, cachingStrategy, byteArrayManager,
|
||||||
favoredNodes, i, coordinator);
|
favoredNodes, i, coordinator);
|
||||||
streamers.set(i, streamer);
|
streamers.set(i, streamer);
|
||||||
currentPackets[i] = null;
|
currentPackets[i] = null;
|
||||||
if (i == 0) {
|
if (i == currentIndex) {
|
||||||
this.streamer = streamer;
|
this.streamer = streamer;
|
||||||
|
this.currentPacket = null;
|
||||||
}
|
}
|
||||||
streamer.start();
|
streamer.start();
|
||||||
}
|
}
|
||||||
@ -404,6 +408,18 @@ private void waitEndBlocks(int i) throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private DatanodeInfo[] getExcludedNodes() {
|
||||||
|
List<DatanodeInfo> excluded = new ArrayList<>();
|
||||||
|
for (StripedDataStreamer streamer : streamers) {
|
||||||
|
for (DatanodeInfo e : streamer.getExcludedNodes()) {
|
||||||
|
if (e != null) {
|
||||||
|
excluded.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return excluded.toArray(new DatanodeInfo[excluded.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
private void allocateNewBlock() throws IOException {
|
private void allocateNewBlock() throws IOException {
|
||||||
if (currentBlockGroup != null) {
|
if (currentBlockGroup != null) {
|
||||||
for (int i = 0; i < numAllBlocks; i++) {
|
for (int i = 0; i < numAllBlocks; i++) {
|
||||||
@ -412,17 +428,17 @@ private void allocateNewBlock() throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
failedStreamers.clear();
|
failedStreamers.clear();
|
||||||
|
DatanodeInfo[] excludedNodes = getExcludedNodes();
|
||||||
|
LOG.debug("Excluding DataNodes when allocating new block: "
|
||||||
|
+ Arrays.asList(excludedNodes));
|
||||||
|
|
||||||
// replace failed streamers
|
// replace failed streamers
|
||||||
replaceFailedStreamers();
|
replaceFailedStreamers();
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
LOG.debug("Allocating new block group. The previous block group: "
|
||||||
LOG.debug("Allocating new block group. The previous block group: "
|
+ currentBlockGroup);
|
||||||
+ currentBlockGroup);
|
final LocatedBlock lb = addBlock(excludedNodes, dfsClient, src,
|
||||||
}
|
currentBlockGroup, fileId, favoredNodes);
|
||||||
|
|
||||||
// TODO collect excludedNodes from all the data streamers
|
|
||||||
final LocatedBlock lb = addBlock(null, dfsClient, src, currentBlockGroup,
|
|
||||||
fileId, favoredNodes);
|
|
||||||
assert lb.isStriped();
|
assert lb.isStriped();
|
||||||
if (lb.getLocations().length < numDataBlocks) {
|
if (lb.getLocations().length < numDataBlocks) {
|
||||||
throw new IOException("Failed to get " + numDataBlocks
|
throw new IOException("Failed to get " + numDataBlocks
|
||||||
@ -437,18 +453,17 @@ private void allocateNewBlock() throws IOException {
|
|||||||
numAllBlocks - numDataBlocks);
|
numAllBlocks - numDataBlocks);
|
||||||
for (int i = 0; i < blocks.length; i++) {
|
for (int i = 0; i < blocks.length; i++) {
|
||||||
StripedDataStreamer si = getStripedDataStreamer(i);
|
StripedDataStreamer si = getStripedDataStreamer(i);
|
||||||
if (si.isHealthy()) { // skipping failed data streamer
|
assert si.isHealthy();
|
||||||
if (blocks[i] == null) {
|
if (blocks[i] == null) {
|
||||||
// Set exception and close streamer as there is no block locations
|
// Set exception and close streamer as there is no block locations
|
||||||
// found for the parity block.
|
// found for the parity block.
|
||||||
LOG.warn("Failed to get block location for parity block, index=" + i);
|
LOG.warn("Failed to get block location for parity block, index=" + i);
|
||||||
si.getLastException().set(
|
si.getLastException().set(
|
||||||
new IOException("Failed to get following block, i=" + i));
|
new IOException("Failed to get following block, i=" + i));
|
||||||
si.getErrorState().setInternalError();
|
si.getErrorState().setInternalError();
|
||||||
si.close(true);
|
si.close(true);
|
||||||
} else {
|
} else {
|
||||||
coordinator.getFollowingBlocks().offer(i, blocks[i]);
|
coordinator.getFollowingBlocks().offer(i, blocks[i]);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -462,7 +477,6 @@ private boolean shouldEndBlockGroup() {
|
|||||||
protected synchronized void writeChunk(byte[] bytes, int offset, int len,
|
protected synchronized void writeChunk(byte[] bytes, int offset, int len,
|
||||||
byte[] checksum, int ckoff, int cklen) throws IOException {
|
byte[] checksum, int ckoff, int cklen) throws IOException {
|
||||||
final int index = getCurrentIndex();
|
final int index = getCurrentIndex();
|
||||||
final StripedDataStreamer current = getCurrentStreamer();
|
|
||||||
final int pos = cellBuffers.addTo(index, bytes, offset, len);
|
final int pos = cellBuffers.addTo(index, bytes, offset, len);
|
||||||
final boolean cellFull = pos == cellSize;
|
final boolean cellFull = pos == cellSize;
|
||||||
|
|
||||||
@ -472,6 +486,8 @@ protected synchronized void writeChunk(byte[] bytes, int offset, int len,
|
|||||||
}
|
}
|
||||||
|
|
||||||
currentBlockGroup.setNumBytes(currentBlockGroup.getNumBytes() + len);
|
currentBlockGroup.setNumBytes(currentBlockGroup.getNumBytes() + len);
|
||||||
|
// note: the current streamer can be refreshed after allocating a new block
|
||||||
|
final StripedDataStreamer current = getCurrentStreamer();
|
||||||
if (current.isHealthy()) {
|
if (current.isHealthy()) {
|
||||||
try {
|
try {
|
||||||
super.writeChunk(bytes, offset, len, checksum, ckoff, cklen);
|
super.writeChunk(bytes, offset, len, checksum, ckoff, cklen);
|
||||||
@ -492,11 +508,11 @@ protected synchronized void writeChunk(byte[] bytes, int offset, int len,
|
|||||||
cellBuffers.flipDataBuffers();
|
cellBuffers.flipDataBuffers();
|
||||||
writeParityCells();
|
writeParityCells();
|
||||||
next = 0;
|
next = 0;
|
||||||
// check failure state for all the streamers. Bump GS if necessary
|
|
||||||
checkStreamerFailures();
|
|
||||||
|
|
||||||
// if this is the end of the block group, end each internal block
|
// if this is the end of the block group, end each internal block
|
||||||
if (shouldEndBlockGroup()) {
|
if (shouldEndBlockGroup()) {
|
||||||
|
flushAllInternals();
|
||||||
|
checkStreamerFailures();
|
||||||
for (int i = 0; i < numAllBlocks; i++) {
|
for (int i = 0; i < numAllBlocks; i++) {
|
||||||
final StripedDataStreamer s = setCurrentStreamer(i);
|
final StripedDataStreamer s = setCurrentStreamer(i);
|
||||||
if (s.isHealthy()) {
|
if (s.isHealthy()) {
|
||||||
@ -505,6 +521,9 @@ protected synchronized void writeChunk(byte[] bytes, int offset, int len,
|
|||||||
} catch (IOException ignored) {}
|
} catch (IOException ignored) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// check failure state for all the streamers. Bump GS if necessary
|
||||||
|
checkStreamerFailures();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setCurrentStreamer(next);
|
setCurrentStreamer(next);
|
||||||
@ -522,11 +541,32 @@ void enqueueCurrentPacketFull() throws IOException {
|
|||||||
// no need to end block here
|
// no need to end block here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return whether the data streamer with the given index is streaming data.
|
||||||
|
* Note the streamer may not be in STREAMING stage if the block length is less
|
||||||
|
* than a stripe.
|
||||||
|
*/
|
||||||
|
private boolean isStreamerWriting(int streamerIndex) {
|
||||||
|
final long length = currentBlockGroup == null ?
|
||||||
|
0 : currentBlockGroup.getNumBytes();
|
||||||
|
if (length == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (streamerIndex >= numDataBlocks) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
final int numCells = (int) ((length - 1) / cellSize + 1);
|
||||||
|
return streamerIndex < numCells;
|
||||||
|
}
|
||||||
|
|
||||||
private Set<StripedDataStreamer> markExternalErrorOnStreamers() {
|
private Set<StripedDataStreamer> markExternalErrorOnStreamers() {
|
||||||
Set<StripedDataStreamer> healthySet = new HashSet<>();
|
Set<StripedDataStreamer> healthySet = new HashSet<>();
|
||||||
for (StripedDataStreamer streamer : streamers) {
|
for (int i = 0; i < numAllBlocks; i++) {
|
||||||
if (streamer.isHealthy() &&
|
final StripedDataStreamer streamer = getStripedDataStreamer(i);
|
||||||
streamer.getStage() == BlockConstructionStage.DATA_STREAMING) {
|
if (streamer.isHealthy() && isStreamerWriting(i)) {
|
||||||
|
Preconditions.checkState(
|
||||||
|
streamer.getStage() == BlockConstructionStage.DATA_STREAMING,
|
||||||
|
"streamer: " + streamer);
|
||||||
streamer.setExternalError();
|
streamer.setExternalError();
|
||||||
healthySet.add(streamer);
|
healthySet.add(streamer);
|
||||||
}
|
}
|
||||||
@ -541,12 +581,14 @@ private Set<StripedDataStreamer> markExternalErrorOnStreamers() {
|
|||||||
*/
|
*/
|
||||||
private void checkStreamerFailures() throws IOException {
|
private void checkStreamerFailures() throws IOException {
|
||||||
Set<StripedDataStreamer> newFailed = checkStreamers();
|
Set<StripedDataStreamer> newFailed = checkStreamers();
|
||||||
if (newFailed.size() > 0) {
|
if (newFailed.size() == 0) {
|
||||||
// for healthy streamers, wait till all of them have fetched the new block
|
return;
|
||||||
// and flushed out all the enqueued packets.
|
|
||||||
flushAllInternals();
|
|
||||||
}
|
}
|
||||||
// get all the current failed streamers after the flush
|
|
||||||
|
// for healthy streamers, wait till all of them have fetched the new block
|
||||||
|
// and flushed out all the enqueued packets.
|
||||||
|
flushAllInternals();
|
||||||
|
// recheck failed streamers again after the flush
|
||||||
newFailed = checkStreamers();
|
newFailed = checkStreamers();
|
||||||
while (newFailed.size() > 0) {
|
while (newFailed.size() > 0) {
|
||||||
failedStreamers.addAll(newFailed);
|
failedStreamers.addAll(newFailed);
|
||||||
@ -629,6 +671,7 @@ private Set<StripedDataStreamer> waitCreatingNewStreams(
|
|||||||
for (StripedDataStreamer streamer : healthyStreamers) {
|
for (StripedDataStreamer streamer : healthyStreamers) {
|
||||||
if (!coordinator.updateStreamerMap.containsKey(streamer)) {
|
if (!coordinator.updateStreamerMap.containsKey(streamer)) {
|
||||||
// close the streamer if it is too slow to create new connection
|
// close the streamer if it is too slow to create new connection
|
||||||
|
LOG.info("close the slow stream " + streamer);
|
||||||
streamer.setStreamerAsClosed();
|
streamer.setStreamerAsClosed();
|
||||||
failed.add(streamer);
|
failed.add(streamer);
|
||||||
}
|
}
|
||||||
|
@ -359,7 +359,7 @@ synchronized void checkRestartingNodeDeadline(DatanodeInfo[] nodes) {
|
|||||||
private volatile String[] storageIDs = null;
|
private volatile String[] storageIDs = null;
|
||||||
private final ErrorState errorState;
|
private final ErrorState errorState;
|
||||||
|
|
||||||
private BlockConstructionStage stage; // block construction stage
|
private volatile BlockConstructionStage stage; // block construction stage
|
||||||
protected long bytesSent = 0; // number of bytes that've been sent
|
protected long bytesSent = 0; // number of bytes that've been sent
|
||||||
private final boolean isLazyPersistFile;
|
private final boolean isLazyPersistFile;
|
||||||
|
|
||||||
@ -588,7 +588,7 @@ public void run() {
|
|||||||
LOG.debug("stage=" + stage + ", " + this);
|
LOG.debug("stage=" + stage + ", " + this);
|
||||||
}
|
}
|
||||||
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
|
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
|
||||||
LOG.debug("Allocating new block");
|
LOG.debug("Allocating new block: " + this);
|
||||||
setPipeline(nextBlockOutputStream());
|
setPipeline(nextBlockOutputStream());
|
||||||
initDataStreaming();
|
initDataStreaming();
|
||||||
} else if (stage == BlockConstructionStage.PIPELINE_SETUP_APPEND) {
|
} else if (stage == BlockConstructionStage.PIPELINE_SETUP_APPEND) {
|
||||||
@ -748,7 +748,7 @@ void release() {
|
|||||||
void waitForAckedSeqno(long seqno) throws IOException {
|
void waitForAckedSeqno(long seqno) throws IOException {
|
||||||
try (TraceScope ignored = dfsClient.getTracer().
|
try (TraceScope ignored = dfsClient.getTracer().
|
||||||
newScope("waitForAckedSeqno")) {
|
newScope("waitForAckedSeqno")) {
|
||||||
LOG.debug("Waiting for ack for: {}", seqno);
|
LOG.debug("{} waiting for ack for: {}", this, seqno);
|
||||||
long begin = Time.monotonicNow();
|
long begin = Time.monotonicNow();
|
||||||
try {
|
try {
|
||||||
synchronized (dataQueue) {
|
synchronized (dataQueue) {
|
||||||
@ -1085,6 +1085,7 @@ private boolean processDatanodeOrExternalError() throws IOException {
|
|||||||
if (!errorState.hasDatanodeError() && !shouldHandleExternalError()) {
|
if (!errorState.hasDatanodeError() && !shouldHandleExternalError()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LOG.debug("start process datanode/external error, {}", this);
|
||||||
if (response != null) {
|
if (response != null) {
|
||||||
LOG.info("Error Recovery for " + block +
|
LOG.info("Error Recovery for " + block +
|
||||||
" waiting for responder to exit. ");
|
" waiting for responder to exit. ");
|
||||||
@ -1307,10 +1308,12 @@ private void transfer(final DatanodeInfo src, final DatanodeInfo[] targets,
|
|||||||
* It keeps on trying until a pipeline is setup
|
* It keeps on trying until a pipeline is setup
|
||||||
*/
|
*/
|
||||||
private void setupPipelineForAppendOrRecovery() throws IOException {
|
private void setupPipelineForAppendOrRecovery() throws IOException {
|
||||||
// check number of datanodes
|
// Check number of datanodes. Note that if there is no healthy datanode,
|
||||||
|
// this must be internal error because we mark external error in striped
|
||||||
|
// outputstream only when all the streamers are in the DATA_STREAMING stage
|
||||||
if (nodes == null || nodes.length == 0) {
|
if (nodes == null || nodes.length == 0) {
|
||||||
String msg = "Could not get block locations. " + "Source file \""
|
String msg = "Could not get block locations. " + "Source file \""
|
||||||
+ src + "\" - Aborting...";
|
+ src + "\" - Aborting..." + this;
|
||||||
LOG.warn(msg);
|
LOG.warn(msg);
|
||||||
lastException.set(new IOException(msg));
|
lastException.set(new IOException(msg));
|
||||||
streamerClosed = true;
|
streamerClosed = true;
|
||||||
@ -1462,8 +1465,9 @@ ExtendedBlock updatePipeline(long newGS) throws IOException {
|
|||||||
return newBlock;
|
return newBlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getNumBlockWriteRetry() {
|
DatanodeInfo[] getExcludedNodes() {
|
||||||
return dfsClient.getConf().getNumBlockWriteRetry();
|
return excludedNodes.getAllPresent(excludedNodes.asMap().keySet())
|
||||||
|
.keySet().toArray(new DatanodeInfo[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1483,10 +1487,7 @@ protected LocatedBlock nextBlockOutputStream() throws IOException {
|
|||||||
errorState.resetInternalError();
|
errorState.resetInternalError();
|
||||||
lastException.clear();
|
lastException.clear();
|
||||||
|
|
||||||
DatanodeInfo[] excluded =
|
DatanodeInfo[] excluded = getExcludedNodes();
|
||||||
excludedNodes.getAllPresent(excludedNodes.asMap().keySet())
|
|
||||||
.keySet()
|
|
||||||
.toArray(new DatanodeInfo[0]);
|
|
||||||
block = oldBlock;
|
block = oldBlock;
|
||||||
lb = locateFollowingBlock(excluded.length > 0 ? excluded : null);
|
lb = locateFollowingBlock(excluded.length > 0 ? excluded : null);
|
||||||
block = lb.getBlock();
|
block = lb.getBlock();
|
||||||
|
@ -105,7 +105,7 @@ protected LocatedBlock nextBlockOutputStream() throws IOException {
|
|||||||
final DatanodeInfo badNode = nodes[getErrorState().getBadNodeIndex()];
|
final DatanodeInfo badNode = nodes[getErrorState().getBadNodeIndex()];
|
||||||
LOG.info("Excluding datanode " + badNode);
|
LOG.info("Excluding datanode " + badNode);
|
||||||
excludedNodes.put(badNode, badNode);
|
excludedNodes.put(badNode, badNode);
|
||||||
throw new IOException("Unable to create new block.");
|
throw new IOException("Unable to create new block." + this);
|
||||||
}
|
}
|
||||||
return lb;
|
return lb;
|
||||||
}
|
}
|
||||||
|
@ -455,3 +455,6 @@
|
|||||||
DFSStripedOutputStream. (jing9 and Walter Su)
|
DFSStripedOutputStream. (jing9 and Walter Su)
|
||||||
|
|
||||||
HDFS-9185. Fix null tracer in ErasureCodingWorker. (Rakesh R via jing9)
|
HDFS-9185. Fix null tracer in ErasureCodingWorker. (Rakesh R via jing9)
|
||||||
|
|
||||||
|
HDFS-9180. Update excluded DataNodes in DFSStripedOutputStream based on failures
|
||||||
|
in data streamers. (jing9)
|
||||||
|
@ -44,6 +44,7 @@
|
|||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
@ -58,6 +59,7 @@ public class StripedFileTestUtil {
|
|||||||
public static final short NUM_DATA_BLOCKS = (short) 6;
|
public static final short NUM_DATA_BLOCKS = (short) 6;
|
||||||
public static final short NUM_PARITY_BLOCKS = (short) 3;
|
public static final short NUM_PARITY_BLOCKS = (short) 3;
|
||||||
public static final int BLOCK_STRIPED_CELL_SIZE = 64 * 1024;
|
public static final int BLOCK_STRIPED_CELL_SIZE = 64 * 1024;
|
||||||
|
public static final int BLOCK_STRIPE_SIZE = BLOCK_STRIPED_CELL_SIZE * NUM_DATA_BLOCKS;
|
||||||
|
|
||||||
static final int stripesPerBlock = 4;
|
static final int stripesPerBlock = 4;
|
||||||
static final int blockSize = BLOCK_STRIPED_CELL_SIZE * stripesPerBlock;
|
static final int blockSize = BLOCK_STRIPED_CELL_SIZE * stripesPerBlock;
|
||||||
@ -113,7 +115,9 @@ static void verifyPread(FileSystem fs, Path srcPath, int fileLength,
|
|||||||
offset += target;
|
offset += target;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < fileLength - startOffset; i++) {
|
for (int i = 0; i < fileLength - startOffset; i++) {
|
||||||
assertEquals("Byte at " + (startOffset + i) + " is different, " + "the startOffset is " + startOffset, expected[startOffset + i], result[i]);
|
assertEquals("Byte at " + (startOffset + i) + " is different, "
|
||||||
|
+ "the startOffset is " + startOffset, expected[startOffset + i],
|
||||||
|
result[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -251,11 +255,17 @@ public static short getRealTotalBlockNum(int numBytes) {
|
|||||||
return (short) (getRealDataBlockNum(numBytes) + NUM_PARITY_BLOCKS);
|
return (short) (getRealDataBlockNum(numBytes) + NUM_PARITY_BLOCKS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void waitBlockGroupsReported(DistributedFileSystem fs,
|
||||||
|
String src) throws Exception {
|
||||||
|
waitBlockGroupsReported(fs, src, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wait for all the internalBlocks of the blockGroups of the given file to be reported.
|
* Wait for all the internalBlocks of the blockGroups of the given file to be
|
||||||
|
* reported.
|
||||||
*/
|
*/
|
||||||
public static void waitBlockGroupsReported(DistributedFileSystem fs, String src)
|
public static void waitBlockGroupsReported(DistributedFileSystem fs,
|
||||||
throws IOException, InterruptedException, TimeoutException {
|
String src, int numDeadDNs) throws Exception {
|
||||||
boolean success;
|
boolean success;
|
||||||
final int ATTEMPTS = 40;
|
final int ATTEMPTS = 40;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
@ -265,11 +275,12 @@ public static void waitBlockGroupsReported(DistributedFileSystem fs, String src)
|
|||||||
count++;
|
count++;
|
||||||
LocatedBlocks lbs = fs.getClient().getLocatedBlocks(src, 0);
|
LocatedBlocks lbs = fs.getClient().getLocatedBlocks(src, 0);
|
||||||
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
|
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
|
||||||
short expected = getRealTotalBlockNum((int) lb.getBlockSize());
|
short expected = (short) (getRealTotalBlockNum((int) lb.getBlockSize())
|
||||||
|
- numDeadDNs);
|
||||||
int reported = lb.getLocations().length;
|
int reported = lb.getLocations().length;
|
||||||
if (reported != expected){
|
if (reported < expected){
|
||||||
success = false;
|
success = false;
|
||||||
System.out.println("blockGroup " + lb.getBlock() + " of file " + src
|
LOG.info("blockGroup " + lb.getBlock() + " of file " + src
|
||||||
+ " has reported internalBlocks " + reported
|
+ " has reported internalBlocks " + reported
|
||||||
+ " (desired " + expected + "); locations "
|
+ " (desired " + expected + "); locations "
|
||||||
+ Joiner.on(' ').join(lb.getLocations()));
|
+ Joiner.on(' ').join(lb.getLocations()));
|
||||||
@ -278,7 +289,7 @@ public static void waitBlockGroupsReported(DistributedFileSystem fs, String src)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (success) {
|
if (success) {
|
||||||
System.out.println("All blockGroups of file " + src
|
LOG.info("All blockGroups of file " + src
|
||||||
+ " verified to have all internalBlocks.");
|
+ " verified to have all internalBlocks.");
|
||||||
}
|
}
|
||||||
} while (!success && count < ATTEMPTS);
|
} while (!success && count < ATTEMPTS);
|
||||||
@ -348,10 +359,9 @@ public static void verifyLocatedStripedBlocks(LocatedBlocks lbs, int groupSize)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void checkData(DistributedFileSystem dfs, Path srcPath, int length,
|
static void checkData(DistributedFileSystem dfs, Path srcPath, int length,
|
||||||
int[] killedDnIndex, long oldGS) throws IOException {
|
List<DatanodeInfo> killedList, List<Long> oldGSList) throws IOException {
|
||||||
|
|
||||||
StripedFileTestUtil.verifyLength(dfs, srcPath, length);
|
StripedFileTestUtil.verifyLength(dfs, srcPath, length);
|
||||||
Arrays.sort(killedDnIndex);
|
|
||||||
List<List<LocatedBlock>> blockGroupList = new ArrayList<>();
|
List<List<LocatedBlock>> blockGroupList = new ArrayList<>();
|
||||||
LocatedBlocks lbs = dfs.getClient().getLocatedBlocks(srcPath.toString(), 0L,
|
LocatedBlocks lbs = dfs.getClient().getLocatedBlocks(srcPath.toString(), 0L,
|
||||||
Long.MAX_VALUE);
|
Long.MAX_VALUE);
|
||||||
@ -361,10 +371,12 @@ static void checkData(DistributedFileSystem dfs, Path srcPath, int length,
|
|||||||
}
|
}
|
||||||
assertEquals(expectedNumGroup, lbs.getLocatedBlocks().size());
|
assertEquals(expectedNumGroup, lbs.getLocatedBlocks().size());
|
||||||
|
|
||||||
|
int index = 0;
|
||||||
for (LocatedBlock firstBlock : lbs.getLocatedBlocks()) {
|
for (LocatedBlock firstBlock : lbs.getLocatedBlocks()) {
|
||||||
Assert.assertTrue(firstBlock instanceof LocatedStripedBlock);
|
Assert.assertTrue(firstBlock instanceof LocatedStripedBlock);
|
||||||
|
|
||||||
final long gs = firstBlock.getBlock().getGenerationStamp();
|
final long gs = firstBlock.getBlock().getGenerationStamp();
|
||||||
|
final long oldGS = oldGSList != null ? oldGSList.get(index++) : -1L;
|
||||||
final String s = "gs=" + gs + ", oldGS=" + oldGS;
|
final String s = "gs=" + gs + ", oldGS=" + oldGS;
|
||||||
LOG.info(s);
|
LOG.info(s);
|
||||||
Assert.assertTrue(s, gs >= oldGS);
|
Assert.assertTrue(s, gs >= oldGS);
|
||||||
@ -389,6 +401,7 @@ static void checkData(DistributedFileSystem dfs, Path srcPath, int length,
|
|||||||
byte[][] dataBlockBytes = new byte[NUM_DATA_BLOCKS][];
|
byte[][] dataBlockBytes = new byte[NUM_DATA_BLOCKS][];
|
||||||
byte[][] parityBlockBytes = new byte[NUM_PARITY_BLOCKS][];
|
byte[][] parityBlockBytes = new byte[NUM_PARITY_BLOCKS][];
|
||||||
|
|
||||||
|
Set<Integer> checkSet = new HashSet<>();
|
||||||
// for each block, use BlockReader to read data
|
// for each block, use BlockReader to read data
|
||||||
for (int i = 0; i < blockList.size(); i++) {
|
for (int i = 0; i < blockList.size(); i++) {
|
||||||
final int j = i >= NUM_DATA_BLOCKS? 0: i;
|
final int j = i >= NUM_DATA_BLOCKS? 0: i;
|
||||||
@ -417,19 +430,22 @@ static void checkData(DistributedFileSystem dfs, Path srcPath, int length,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Arrays.binarySearch(killedDnIndex, i) < 0) {
|
DatanodeInfo dn = blockList.get(i).getLocations()[0];
|
||||||
|
if (!killedList.contains(dn)) {
|
||||||
final BlockReader blockReader = BlockReaderTestUtil.getBlockReader(
|
final BlockReader blockReader = BlockReaderTestUtil.getBlockReader(
|
||||||
dfs, lb, 0, block.getNumBytes());
|
dfs, lb, 0, block.getNumBytes());
|
||||||
blockReader.readAll(blockBytes, 0, (int) block.getNumBytes());
|
blockReader.readAll(blockBytes, 0, (int) block.getNumBytes());
|
||||||
blockReader.close();
|
blockReader.close();
|
||||||
|
checkSet.add(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
LOG.info("Internal blocks to check: " + checkSet);
|
||||||
|
|
||||||
// check data
|
// check data
|
||||||
final int groupPosInFile = group*BLOCK_GROUP_SIZE;
|
final int groupPosInFile = group*BLOCK_GROUP_SIZE;
|
||||||
for (int i = 0; i < dataBlockBytes.length; i++) {
|
for (int i = 0; i < dataBlockBytes.length; i++) {
|
||||||
boolean killed = false;
|
boolean killed = false;
|
||||||
if (Arrays.binarySearch(killedDnIndex, i) >= 0){
|
if (!checkSet.contains(i)) {
|
||||||
killed = true;
|
killed = true;
|
||||||
}
|
}
|
||||||
final byte[] actual = dataBlockBytes[i];
|
final byte[] actual = dataBlockBytes[i];
|
||||||
@ -453,15 +469,15 @@ static void checkData(DistributedFileSystem dfs, Path srcPath, int length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// check parity
|
// check parity
|
||||||
verifyParityBlocks(dfs.getConf(), lbs.getLocatedBlocks().get(group)
|
verifyParityBlocks(dfs.getConf(),
|
||||||
.getBlockSize(),
|
lbs.getLocatedBlocks().get(group).getBlockSize(),
|
||||||
BLOCK_STRIPED_CELL_SIZE, dataBlockBytes, parityBlockBytes, killedDnIndex);
|
BLOCK_STRIPED_CELL_SIZE, dataBlockBytes, parityBlockBytes, checkSet);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void verifyParityBlocks(Configuration conf, final long size, final int cellSize,
|
static void verifyParityBlocks(Configuration conf, final long size,
|
||||||
byte[][] dataBytes, byte[][] parityBytes, int[] killedDnIndex) {
|
final int cellSize, byte[][] dataBytes, byte[][] parityBytes,
|
||||||
Arrays.sort(killedDnIndex);
|
Set<Integer> checkSet) {
|
||||||
// verify the parity blocks
|
// verify the parity blocks
|
||||||
int parityBlkSize = (int) StripedBlockUtil.getInternalBlockLength(
|
int parityBlkSize = (int) StripedBlockUtil.getInternalBlockLength(
|
||||||
size, cellSize, dataBytes.length, dataBytes.length);
|
size, cellSize, dataBytes.length, dataBytes.length);
|
||||||
@ -482,9 +498,9 @@ static void verifyParityBlocks(Configuration conf, final long size, final int ce
|
|||||||
CodecUtil.createRSRawEncoder(conf, dataBytes.length, parityBytes.length);
|
CodecUtil.createRSRawEncoder(conf, dataBytes.length, parityBytes.length);
|
||||||
encoder.encode(dataBytes, expectedParityBytes);
|
encoder.encode(dataBytes, expectedParityBytes);
|
||||||
for (int i = 0; i < parityBytes.length; i++) {
|
for (int i = 0; i < parityBytes.length; i++) {
|
||||||
if (Arrays.binarySearch(killedDnIndex, dataBytes.length + i) < 0){
|
if (checkSet.contains(i + dataBytes.length)){
|
||||||
Assert.assertArrayEquals("i=" + i + ", killedDnIndex=" + Arrays.toString(killedDnIndex),
|
Assert.assertArrayEquals("i=" + i, expectedParityBytes[i],
|
||||||
expectedParityBytes[i], parityBytes[i]);
|
parityBytes[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,11 +18,15 @@
|
|||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
@ -151,6 +155,6 @@ private void testOneFile(String src, int writeBytes) throws Exception {
|
|||||||
StripedFileTestUtil.waitBlockGroupsReported(fs, src);
|
StripedFileTestUtil.waitBlockGroupsReported(fs, src);
|
||||||
|
|
||||||
StripedFileTestUtil.checkData(fs, testPath, writeBytes,
|
StripedFileTestUtil.checkData(fs, testPath, writeBytes,
|
||||||
new int[]{}, 0);
|
new ArrayList<DatanodeInfo>(), null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,23 +24,25 @@
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.logging.impl.Log4JLogger;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.SecurityTestUtil;
|
import org.apache.hadoop.hdfs.security.token.block.SecurityTestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
@ -59,6 +61,9 @@ public class TestDFSStripedOutputStreamWithFailure {
|
|||||||
static {
|
static {
|
||||||
GenericTestUtils.setLogLevel(DFSOutputStream.LOG, Level.ALL);
|
GenericTestUtils.setLogLevel(DFSOutputStream.LOG, Level.ALL);
|
||||||
GenericTestUtils.setLogLevel(DataStreamer.LOG, Level.ALL);
|
GenericTestUtils.setLogLevel(DataStreamer.LOG, Level.ALL);
|
||||||
|
GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL);
|
||||||
|
((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class))
|
||||||
|
.getLogger().setLevel(Level.ALL);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final int NUM_DATA_BLOCKS = StripedFileTestUtil.NUM_DATA_BLOCKS;
|
private static final int NUM_DATA_BLOCKS = StripedFileTestUtil.NUM_DATA_BLOCKS;
|
||||||
@ -134,6 +139,7 @@ static int getLength(int i) {
|
|||||||
private DistributedFileSystem dfs;
|
private DistributedFileSystem dfs;
|
||||||
private final Path dir = new Path("/"
|
private final Path dir = new Path("/"
|
||||||
+ TestDFSStripedOutputStreamWithFailure.class.getSimpleName());
|
+ TestDFSStripedOutputStreamWithFailure.class.getSimpleName());
|
||||||
|
private final Random random = new Random();
|
||||||
|
|
||||||
private void setup(Configuration conf) throws IOException {
|
private void setup(Configuration conf) throws IOException {
|
||||||
final int numDNs = NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS;
|
final int numDNs = NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS;
|
||||||
@ -153,7 +159,8 @@ private void tearDown() {
|
|||||||
private HdfsConfiguration newHdfsConfiguration() {
|
private HdfsConfiguration newHdfsConfiguration() {
|
||||||
final HdfsConfiguration conf = new HdfsConfiguration();
|
final HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||||
conf.setLong(HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, 6000L);
|
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY,
|
||||||
|
false);
|
||||||
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
||||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0);
|
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0);
|
||||||
return conf;
|
return conf;
|
||||||
@ -164,11 +171,31 @@ public void testDatanodeFailure56() throws Exception {
|
|||||||
runTest(getLength(56));
|
runTest(getLength(56));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=240000)
|
||||||
|
public void testDatanodeFailureRandomLength() throws Exception {
|
||||||
|
int lenIndex = random.nextInt(LENGTHS.size());
|
||||||
|
LOG.info("run testMultipleDatanodeFailureRandomLength with length index: "
|
||||||
|
+ lenIndex);
|
||||||
|
runTest(getLength(lenIndex));
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout=240000)
|
@Test(timeout=240000)
|
||||||
public void testMultipleDatanodeFailure56() throws Exception {
|
public void testMultipleDatanodeFailure56() throws Exception {
|
||||||
runTestWithMultipleFailure(getLength(56));
|
runTestWithMultipleFailure(getLength(56));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Randomly pick a length and run tests with multiple data failures
|
||||||
|
* TODO: enable this later
|
||||||
|
*/
|
||||||
|
//@Test(timeout=240000)
|
||||||
|
public void testMultipleDatanodeFailureRandomLength() throws Exception {
|
||||||
|
int lenIndex = random.nextInt(LENGTHS.size());
|
||||||
|
LOG.info("run testMultipleDatanodeFailureRandomLength with length index: "
|
||||||
|
+ lenIndex);
|
||||||
|
runTestWithMultipleFailure(getLength(lenIndex));
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout=240000)
|
@Test(timeout=240000)
|
||||||
public void testBlockTokenExpired() throws Exception {
|
public void testBlockTokenExpired() throws Exception {
|
||||||
final int length = NUM_DATA_BLOCKS * (BLOCK_SIZE - CELL_SIZE);
|
final int length = NUM_DATA_BLOCKS * (BLOCK_SIZE - CELL_SIZE);
|
||||||
@ -208,11 +235,10 @@ public void testAddBlockWhenNoSufficientDataBlockNumOfNodes()
|
|||||||
}
|
}
|
||||||
cluster.restartNameNodes();
|
cluster.restartNameNodes();
|
||||||
cluster.triggerHeartbeats();
|
cluster.triggerHeartbeats();
|
||||||
DatanodeInfo[] info = dfs.getClient().datanodeReport(
|
DatanodeInfo[] info = dfs.getClient().datanodeReport(DatanodeReportType.LIVE);
|
||||||
DatanodeReportType.LIVE);
|
|
||||||
assertEquals("Mismatches number of live Dns ", numDatanodes, info.length);
|
assertEquals("Mismatches number of live Dns ", numDatanodes, info.length);
|
||||||
final Path dirFile = new Path(dir, "ecfile");
|
final Path dirFile = new Path(dir, "ecfile");
|
||||||
FSDataOutputStream out = null;
|
FSDataOutputStream out;
|
||||||
try {
|
try {
|
||||||
out = dfs.create(dirFile, true);
|
out = dfs.create(dirFile, true);
|
||||||
out.write("something".getBytes());
|
out.write("something".getBytes());
|
||||||
@ -262,6 +288,7 @@ void runTest(final int length) {
|
|||||||
final HdfsConfiguration conf = newHdfsConfiguration();
|
final HdfsConfiguration conf = newHdfsConfiguration();
|
||||||
for (int dn = 0; dn < 9; dn++) {
|
for (int dn = 0; dn < 9; dn++) {
|
||||||
try {
|
try {
|
||||||
|
LOG.info("runTest: dn=" + dn + ", length=" + length);
|
||||||
setup(conf);
|
setup(conf);
|
||||||
runTest(length, new int[]{length/2}, new int[]{dn}, false);
|
runTest(length, new int[]{length/2}, new int[]{dn}, false);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
@ -277,10 +304,11 @@ void runTest(final int length) {
|
|||||||
|
|
||||||
void runTestWithMultipleFailure(final int length) throws Exception {
|
void runTestWithMultipleFailure(final int length) throws Exception {
|
||||||
final HdfsConfiguration conf = newHdfsConfiguration();
|
final HdfsConfiguration conf = newHdfsConfiguration();
|
||||||
for(int i=0;i<dnIndexSuite.length;i++){
|
for (int[] dnIndex : dnIndexSuite) {
|
||||||
int[] dnIndex = dnIndexSuite[i];
|
|
||||||
int[] killPos = getKillPositions(length, dnIndex.length);
|
int[] killPos = getKillPositions(length, dnIndex.length);
|
||||||
try {
|
try {
|
||||||
|
LOG.info("runTestWithMultipleFailure: length==" + length + ", killPos="
|
||||||
|
+ Arrays.toString(killPos) + ", dnIndex=" + Arrays.toString(dnIndex));
|
||||||
setup(conf);
|
setup(conf);
|
||||||
runTest(length, killPos, dnIndex, false);
|
runTest(length, killPos, dnIndex, false);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
@ -334,6 +362,8 @@ private void runTest(final int length, final int[] killPos,
|
|||||||
|
|
||||||
long firstGS = -1; // first GS of this block group which never proceeds blockRecovery
|
long firstGS = -1; // first GS of this block group which never proceeds blockRecovery
|
||||||
long oldGS = -1; // the old GS before bumping
|
long oldGS = -1; // the old GS before bumping
|
||||||
|
List<Long> gsList = new ArrayList<>();
|
||||||
|
final List<DatanodeInfo> killedDN = new ArrayList<>();
|
||||||
int numKilled=0;
|
int numKilled=0;
|
||||||
for(; pos.get() < length; ) {
|
for(; pos.get() < length; ) {
|
||||||
final int i = pos.getAndIncrement();
|
final int i = pos.getAndIncrement();
|
||||||
@ -353,7 +383,7 @@ private void runTest(final int length, final int[] killPos,
|
|||||||
waitTokenExpires(out);
|
waitTokenExpires(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
killDatanode(cluster, stripedOut, dnIndex[numKilled], pos);
|
killedDN.add(killDatanode(cluster, stripedOut, dnIndex[numKilled], pos));
|
||||||
numKilled++;
|
numKilled++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -363,20 +393,18 @@ private void runTest(final int length, final int[] killPos,
|
|||||||
firstGS = getGenerationStamp(stripedOut);
|
firstGS = getGenerationStamp(stripedOut);
|
||||||
oldGS = firstGS;
|
oldGS = firstGS;
|
||||||
}
|
}
|
||||||
|
if (i > 0 && (i + 1) % BLOCK_GROUP_SIZE == 0) {
|
||||||
|
gsList.add(oldGS);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
gsList.add(oldGS);
|
||||||
out.close();
|
out.close();
|
||||||
assertEquals(dnIndex.length, numKilled);
|
assertEquals(dnIndex.length, numKilled);
|
||||||
|
|
||||||
short expectedReported = StripedFileTestUtil.getRealTotalBlockNum(length);
|
StripedFileTestUtil.waitBlockGroupsReported(dfs, fullPath, numKilled);
|
||||||
for(int idx :dnIndex) {
|
|
||||||
if (length > idx * CELL_SIZE || idx >= NUM_DATA_BLOCKS) {
|
|
||||||
expectedReported--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DFSTestUtil.waitReplication(dfs, p, expectedReported);
|
|
||||||
|
|
||||||
cluster.triggerBlockReports();
|
cluster.triggerBlockReports();
|
||||||
StripedFileTestUtil.checkData(dfs, p, length, dnIndex, oldGS);
|
StripedFileTestUtil.checkData(dfs, p, length, killedDN, gsList);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void write(FSDataOutputStream out, int i) throws IOException {
|
static void write(FSDataOutputStream out, int i) throws IOException {
|
||||||
@ -389,8 +417,7 @@ static void write(FSDataOutputStream out, int i) throws IOException {
|
|||||||
|
|
||||||
static long getGenerationStamp(DFSStripedOutputStream out)
|
static long getGenerationStamp(DFSStripedOutputStream out)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
DFSTestUtil.flushBuffer(out);
|
final long gs = out.getBlock().getGenerationStamp();
|
||||||
final long gs = DFSTestUtil.flushInternal(out).getGenerationStamp();
|
|
||||||
LOG.info("getGenerationStamp returns " + gs);
|
LOG.info("getGenerationStamp returns " + gs);
|
||||||
return gs;
|
return gs;
|
||||||
}
|
}
|
||||||
@ -421,12 +448,15 @@ static DatanodeInfo getDatanodes(StripedDataStreamer streamer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void killDatanode(MiniDFSCluster cluster, DFSStripedOutputStream out,
|
static DatanodeInfo killDatanode(MiniDFSCluster cluster,
|
||||||
final int dnIndex, final AtomicInteger pos) {
|
DFSStripedOutputStream out, final int dnIndex, final AtomicInteger pos) {
|
||||||
final StripedDataStreamer s = out.getStripedDataStreamer(dnIndex);
|
final StripedDataStreamer s = out.getStripedDataStreamer(dnIndex);
|
||||||
final DatanodeInfo datanode = getDatanodes(s);
|
final DatanodeInfo datanode = getDatanodes(s);
|
||||||
LOG.info("killDatanode " + dnIndex + ": " + datanode + ", pos=" + pos);
|
LOG.info("killDatanode " + dnIndex + ": " + datanode + ", pos=" + pos);
|
||||||
cluster.stopDataNode(datanode.getXferAddr());
|
if (datanode != null) {
|
||||||
|
cluster.stopDataNode(datanode.getXferAddr());
|
||||||
|
}
|
||||||
|
return datanode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,6 +66,7 @@ public void setup() throws IOException {
|
|||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0);
|
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0);
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false);
|
||||||
cluster = new MiniDFSCluster.Builder(new HdfsConfiguration())
|
cluster = new MiniDFSCluster.Builder(new HdfsConfiguration())
|
||||||
.numDataNodes(numDNs).build();
|
.numDataNodes(numDNs).build();
|
||||||
cluster.getFileSystem().getClient().setErasureCodingPolicy("/", null);
|
cluster.getFileSystem().getClient().setErasureCodingPolicy("/", null);
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.web.WebHdfsConstants;
|
import org.apache.hadoop.hdfs.web.WebHdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.web.WebHdfsTestUtil;
|
import org.apache.hadoop.hdfs.web.WebHdfsTestUtil;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
@ -49,6 +50,9 @@ public class TestWriteReadStripedFile {
|
|||||||
private static Configuration conf = new HdfsConfiguration();
|
private static Configuration conf = new HdfsConfiguration();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
GenericTestUtils.setLogLevel(DFSOutputStream.LOG, Level.ALL);
|
||||||
|
GenericTestUtils.setLogLevel(DataStreamer.LOG, Level.ALL);
|
||||||
|
GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL);
|
||||||
((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class))
|
((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class))
|
||||||
.getLogger().setLevel(Level.ALL);
|
.getLogger().setLevel(Level.ALL);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user