HDDS-1956. Aged IO Thread exits on first read

Signed-off-by: Anu Engineer <aengineer@apache.org>
This commit is contained in:
Doroszlai, Attila 2019-08-13 09:52:51 +02:00 committed by Anu Engineer
parent bf457797f6
commit 78b714af9c
3 changed files with 30 additions and 18 deletions

View File

@ -68,7 +68,7 @@ public class MiniOzoneChaosCluster extends MiniOzoneClusterImpl {
this.executorService = Executors.newSingleThreadScheduledExecutor(); this.executorService = Executors.newSingleThreadScheduledExecutor();
this.numDatanodes = getHddsDatanodes().size(); this.numDatanodes = getHddsDatanodes().size();
LOG.info("Starting MiniOzoneChaosCluster with:{} datanodes" + numDatanodes); LOG.info("Starting MiniOzoneChaosCluster with {} datanodes", numDatanodes);
LogUtils.setLogLevel(GrpcClientProtocolClient.LOG, Level.WARN); LogUtils.setLogLevel(GrpcClientProtocolClient.LOG, Level.WARN);
} }
@ -108,7 +108,7 @@ public class MiniOzoneChaosCluster extends MiniOzoneClusterImpl {
LOG.info("{} Completed restarting Datanode: {}", failString, LOG.info("{} Completed restarting Datanode: {}", failString,
dn.getUuid()); dn.getUuid());
} catch (Exception e) { } catch (Exception e) {
LOG.error("Failed to restartNodes Datanode", dn.getUuid()); LOG.error("Failed to restartNodes Datanode {}", dn.getUuid(), e);
} }
} }
} }
@ -119,7 +119,7 @@ public class MiniOzoneChaosCluster extends MiniOzoneClusterImpl {
for (int i = 0; i < numNodesToFail; i++) { for (int i = 0; i < numNodesToFail; i++) {
boolean shouldStop = shouldStop(); boolean shouldStop = shouldStop();
int failedNodeIndex = getNodeToFail(); int failedNodeIndex = getNodeToFail();
String stopString = shouldStop ? "Stopping" : "Starting"; String stopString = shouldStop ? "Stopping" : "Restarting";
DatanodeDetails dn = DatanodeDetails dn =
getHddsDatanodes().get(failedNodeIndex).getDatanodeDetails(); getHddsDatanodes().get(failedNodeIndex).getDatanodeDetails();
try { try {
@ -133,7 +133,7 @@ public class MiniOzoneChaosCluster extends MiniOzoneClusterImpl {
LOG.info("Completed {} DataNode {}", stopString, dn.getUuid()); LOG.info("Completed {} DataNode {}", stopString, dn.getUuid());
} catch (Exception e) { } catch (Exception e) {
LOG.error("Failed to shutdown Datanode", dn.getUuid()); LOG.error("Failed {} Datanode {}", stopString, dn.getUuid(), e);
} }
} }
} }

View File

@ -35,6 +35,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Optional;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -49,7 +50,7 @@ import java.util.concurrent.atomic.AtomicInteger;
*/ */
public class MiniOzoneLoadGenerator { public class MiniOzoneLoadGenerator {
static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(MiniOzoneLoadGenerator.class); LoggerFactory.getLogger(MiniOzoneLoadGenerator.class);
private static String keyNameDelimiter = "_"; private static String keyNameDelimiter = "_";
@ -113,7 +114,7 @@ public class MiniOzoneLoadGenerator {
int index = RandomUtils.nextInt(); int index = RandomUtils.nextInt();
String keyName = writeData(index, bucket, threadName); String keyName = writeData(index, bucket, threadName);
readData(bucket, keyName); readData(bucket, keyName, index);
deleteKey(bucket, keyName); deleteKey(bucket, keyName);
} catch (Exception e) { } catch (Exception e) {
@ -133,11 +134,13 @@ public class MiniOzoneLoadGenerator {
ByteBuffer buffer = buffers.get(keyIndex % numBuffers); ByteBuffer buffer = buffers.get(keyIndex % numBuffers);
int bufferCapacity = buffer.capacity(); int bufferCapacity = buffer.capacity();
String keyName = threadName + keyNameDelimiter + keyIndex; String keyName = getKeyName(keyIndex, threadName);
LOG.trace("LOADGEN: Writing key {}", keyName);
try (OzoneOutputStream stream = bucket.createKey(keyName, try (OzoneOutputStream stream = bucket.createKey(keyName,
bufferCapacity, ReplicationType.RATIS, ReplicationFactor.THREE, bufferCapacity, ReplicationType.RATIS, ReplicationFactor.THREE,
new HashMap<>())) { new HashMap<>())) {
stream.write(buffer.array()); stream.write(buffer.array());
LOG.trace("LOADGEN: Written key {}", keyName);
} catch (Throwable t) { } catch (Throwable t) {
LOG.error("LOADGEN: Create key:{} failed with exception, skipping", LOG.error("LOADGEN: Create key:{} failed with exception, skipping",
keyName, t); keyName, t);
@ -147,9 +150,9 @@ public class MiniOzoneLoadGenerator {
return keyName; return keyName;
} }
private void readData(OzoneBucket bucket, String keyName) throws Exception { private void readData(OzoneBucket bucket, String keyName, int index)
int index = Integer.valueOf(keyName.split(keyNameDelimiter)[1]); throws Exception {
LOG.trace("LOADGEN: Reading key {}", keyName);
ByteBuffer buffer = buffers.get(index % numBuffers); ByteBuffer buffer = buffers.get(index % numBuffers);
int bufferCapacity = buffer.capacity(); int bufferCapacity = buffer.capacity();
@ -168,6 +171,7 @@ public class MiniOzoneLoadGenerator {
throw new IOException("Read mismatch, key:" + keyName + throw new IOException("Read mismatch, key:" + keyName +
" read data does not match the written data"); " read data does not match the written data");
} }
LOG.trace("LOADGEN: Read key {}", keyName);
} catch (Throwable t) { } catch (Throwable t) {
LOG.error("LOADGEN: Read key:{} failed with exception", keyName, t); LOG.error("LOADGEN: Read key:{} failed with exception", keyName, t);
throw t; throw t;
@ -175,18 +179,21 @@ public class MiniOzoneLoadGenerator {
} }
private void deleteKey(OzoneBucket bucket, String keyName) throws Exception { private void deleteKey(OzoneBucket bucket, String keyName) throws Exception {
LOG.trace("LOADGEN: Deleting key {}", keyName);
try { try {
bucket.deleteKey(keyName); bucket.deleteKey(keyName);
LOG.trace("LOADGEN: Deleted key {}", keyName);
} catch (Throwable t) { } catch (Throwable t) {
LOG.error("LOADGEN: Unable to delete key:{}", keyName, t); LOG.error("LOADGEN: Unable to delete key:{}", keyName, t);
throw t; throw t;
} }
} }
private String getKeyToRead() { private Optional<Integer> randomKeyToRead() {
int currentIndex = agedFileWrittenIndex.get(); int currentIndex = agedFileWrittenIndex.get();
return currentIndex != 0 ? return currentIndex != 0
String.valueOf(RandomUtils.nextInt(0, currentIndex)): null; ? Optional.of(RandomUtils.nextInt(0, currentIndex))
: Optional.empty();
} }
private void startAgedFilesLoad(long runTimeMillis) { private void startAgedFilesLoad(long runTimeMillis) {
@ -201,12 +208,13 @@ public class MiniOzoneLoadGenerator {
String keyName = null; String keyName = null;
try { try {
if (agedWriteProbability.isTrue()) { if (agedWriteProbability.isTrue()) {
keyName = writeData(agedFileWrittenIndex.incrementAndGet(), keyName = writeData(agedFileWrittenIndex.getAndIncrement(),
agedLoadBucket, threadName); agedLoadBucket, threadName);
} else { } else {
keyName = getKeyToRead(); Optional<Integer> index = randomKeyToRead();
if (keyName != null) { if (index.isPresent()) {
readData(agedLoadBucket, keyName); keyName = getKeyName(index.get(), threadName);
readData(agedLoadBucket, keyName, index.get());
} }
} }
} catch (Throwable t) { } catch (Throwable t) {
@ -253,4 +261,8 @@ public class MiniOzoneLoadGenerator {
LOG.error("error while closing ", e); LOG.error("error while closing ", e);
} }
} }
private static String getKeyName(int keyIndex, String threadName) {
return threadName + keyNameDelimiter + keyIndex;
}
} }

View File

@ -15,7 +15,7 @@ log4j.rootLogger=info,stdout
log4j.threshold=ALL log4j.threshold=ALL
log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n
log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR
log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR