HDDS-834. Datanode goes OOM based because of segment size. Contributed by Mukul Kumar Singh.

This commit is contained in:
Shashikant Banerjee 2018-11-14 15:50:46 +05:30
parent a2fa8324d4
commit 3923a4a279
3 changed files with 25 additions and 11 deletions

View File

@ -69,7 +69,7 @@ public final class ScmConfigKeys {
public static final String DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY = public static final String DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY =
"dfs.container.ratis.segment.size"; "dfs.container.ratis.segment.size";
public static final int DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT = public static final int DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT =
1 * 1024 * 1024 * 1024; 16 * 1024;
public static final String DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY = public static final String DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY =
"dfs.container.ratis.segment.preallocated.size"; "dfs.container.ratis.segment.preallocated.size";
public static final int public static final int

View File

@ -175,10 +175,10 @@
</property> </property>
<property> <property>
<name>dfs.container.ratis.segment.size</name> <name>dfs.container.ratis.segment.size</name>
<value>1073741824</value> <value>16384</value>
<tag>OZONE, RATIS, PERFORMANCE</tag> <tag>OZONE, RATIS, PERFORMANCE</tag>
<description>The size of the raft segment used by Apache Ratis on datanodes. <description>The size of the raft segment used by Apache Ratis on datanodes.
(1 GB by default) (16 KB by default)
</description> </description>
</property> </property>
<property> <property>

View File

@ -119,7 +119,8 @@ public class ContainerStateMachine extends BaseStateMachine {
private final ConcurrentHashMap<Long, Message> createContainerResponseMap; private final ConcurrentHashMap<Long, Message> createContainerResponseMap;
private ExecutorService[] executors; private ExecutorService[] executors;
private final int numExecutors; private final int numExecutors;
private final Map<Long, Long> containerCommandCompletionMap; private final Map<Long, Long> applyTransactionCompletionMap;
private long lastIndex;
/** /**
* CSM metrics. * CSM metrics.
*/ */
@ -137,7 +138,8 @@ public class ContainerStateMachine extends BaseStateMachine {
this.executors = executors.toArray(new ExecutorService[numExecutors]); this.executors = executors.toArray(new ExecutorService[numExecutors]);
this.writeChunkFutureMap = new ConcurrentHashMap<>(); this.writeChunkFutureMap = new ConcurrentHashMap<>();
this.createContainerResponseMap = new ConcurrentHashMap<>(); this.createContainerResponseMap = new ConcurrentHashMap<>();
containerCommandCompletionMap = new ConcurrentHashMap<>(); applyTransactionCompletionMap = new ConcurrentHashMap<>();
this.lastIndex = RaftServerConstants.INVALID_LOG_INDEX;
} }
@Override @Override
@ -161,10 +163,12 @@ public class ContainerStateMachine extends BaseStateMachine {
private long loadSnapshot(SingleFileSnapshotInfo snapshot) { private long loadSnapshot(SingleFileSnapshotInfo snapshot) {
if (snapshot == null) { if (snapshot == null) {
TermIndex empty = TermIndex.newTermIndex(0, 0); TermIndex empty = TermIndex.newTermIndex(0,
RaftServerConstants.INVALID_LOG_INDEX);
LOG.info("The snapshot info is null." + LOG.info("The snapshot info is null." +
"Setting the last applied index to:" + empty); "Setting the last applied index to:" + empty);
setLastAppliedTermIndex(empty); setLastAppliedTermIndex(empty);
lastIndex = RaftServerConstants.INVALID_LOG_INDEX;
return RaftServerConstants.INVALID_LOG_INDEX; return RaftServerConstants.INVALID_LOG_INDEX;
} }
@ -173,6 +177,7 @@ public class ContainerStateMachine extends BaseStateMachine {
snapshot.getFile().getPath().toFile()); snapshot.getFile().getPath().toFile());
LOG.info("Setting the last applied index to " + last); LOG.info("Setting the last applied index to " + last);
setLastAppliedTermIndex(last); setLastAppliedTermIndex(last);
lastIndex = last.getIndex();
return last.getIndex(); return last.getIndex();
} }
@ -450,7 +455,7 @@ public class ContainerStateMachine extends BaseStateMachine {
Long appliedTerm = null; Long appliedTerm = null;
long appliedIndex = -1; long appliedIndex = -1;
for(long i = getLastAppliedTermIndex().getIndex() + 1;; i++) { for(long i = getLastAppliedTermIndex().getIndex() + 1;; i++) {
final Long removed = containerCommandCompletionMap.remove(i); final Long removed = applyTransactionCompletionMap.remove(i);
if (removed == null) { if (removed == null) {
break; break;
} }
@ -458,7 +463,7 @@ public class ContainerStateMachine extends BaseStateMachine {
appliedIndex = i; appliedIndex = i;
} }
if (appliedTerm != null) { if (appliedTerm != null) {
updateLastAppliedTermIndex(appliedIndex, appliedTerm); updateLastAppliedTermIndex(appliedTerm, appliedIndex);
} }
} }
@ -467,7 +472,15 @@ public class ContainerStateMachine extends BaseStateMachine {
*/ */
@Override @Override
public CompletableFuture<Message> applyTransaction(TransactionContext trx) { public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
// ApplyTransaction call can come with an entryIndex much greater than
// lastIndex updated because in between entries in the raft log can be
// appended because raft config persistence. Just add a dummy entry
// for those.
long index = trx.getLogEntry().getIndex(); long index = trx.getLogEntry().getIndex();
for (long i = lastIndex + 1; i < index; i++) {
LOG.info("Gap in indexes at:{} detected, adding dummy entries ", i);
applyTransactionCompletionMap.put(i, trx.getLogEntry().getTerm());
}
try { try {
metrics.incNumApplyTransactionsOps(); metrics.incNumApplyTransactionsOps();
ContainerCommandRequestProto requestProto = ContainerCommandRequestProto requestProto =
@ -500,8 +513,8 @@ public class ContainerStateMachine extends BaseStateMachine {
getCommandExecutor(requestProto)); getCommandExecutor(requestProto));
} else if (cmdType == Type.CreateContainer) { } else if (cmdType == Type.CreateContainer) {
long containerID = requestProto.getContainerID(); long containerID = requestProto.getContainerID();
return CompletableFuture.completedFuture( future = CompletableFuture.completedFuture(
createContainerResponseMap.get(containerID)); createContainerResponseMap.remove(containerID));
} else { } else {
// Make sure that in write chunk, the user data is not set // Make sure that in write chunk, the user data is not set
if (cmdType == Type.WriteChunk) { if (cmdType == Type.WriteChunk) {
@ -512,9 +525,10 @@ public class ContainerStateMachine extends BaseStateMachine {
getCommandExecutor(requestProto)); getCommandExecutor(requestProto));
} }
lastIndex = index;
future.thenAccept(m -> { future.thenAccept(m -> {
final Long previous = final Long previous =
containerCommandCompletionMap applyTransactionCompletionMap
.put(index, trx.getLogEntry().getTerm()); .put(index, trx.getLogEntry().getTerm());
Preconditions.checkState(previous == null); Preconditions.checkState(previous == null);
updateLastApplied(); updateLastApplied();