HBASE-26832 Avoid repeated releasing of flushed wal entries in AsyncFSWAL#syncCompleted (#4281)

Signed-off-by: Xiaolin Ha <haxiaolin@apache.org> (cherry picked from commit 4f491fd5e4)
2022-03-28 07:53:28 +08:00 · 2022-03-28 07:53:28 +08:00 · 9355506c30
parent 9a7ee77bd4
commit 9355506c30
1 changed files with 34 additions and 3 deletions
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AsyncFSWAL.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AsyncFSWAL.java
@ -162,7 +162,7 @@ public class AsyncFSWAL extends AbstractFSWAL<AsyncWriter> {
  private static final int MAX_EPOCH = 0x3FFFFFFF;
  // the lowest bit is waitingRoll, which means new writer is created and we are waiting for old
  // writer to be closed.
-  // the second lowest bit is writerBorken which means the current writer is broken and rollWriter
+  // the second lowest bit is writerBroken which means the current writer is broken and rollWriter
  // is needed.
  // all other bits are the epoch number of the current writer, this is used to detect whether the
  // writer is still the one when you issue the sync.
@ -340,7 +340,38 @@ public class AsyncFSWAL extends AbstractFSWAL<AsyncWriter> {
    }
  }

-  private void syncCompleted(AsyncWriter writer, long processedTxid, long startTimeNs) {
+  private void syncCompleted(long epochWhenSync, AsyncWriter writer, long processedTxid,
+    long startTimeNs) {
+    // Please see the last several comments on HBASE-22761, it is possible that we get a
+    // syncCompleted which acks a previous sync request after we received a syncFailed on the same
+    // writer. So here we will also check on the epoch and state, if the epoch has already been
+    // changed, i.e, we have already rolled the writer, or the writer is already broken, we should
+    // just skip here, to avoid mess up the state or accidentally release some WAL entries and
+    // cause data corruption.
+    // The syncCompleted call is on the critical write path so we should try our best to make it
+    // fast. So here we do not hold consumeLock, for increasing performance. It is safe because
+    // there are only 3 possible situations:
+    // 1. For normal case, the only place where we change epochAndState is when rolling the writer.
+    // Before rolling actually happen, we will only change the state to waitingRoll which is another
+    // bit than writerBroken, and when we actually change the epoch, we can make sure that there is
+    // no out going sync request. So we will always pass the check here and there is no problem.
+    // 2. The writer is broken, but we have not called syncFailed yet. In this case, since
+    // syncFailed and syncCompleted are executed in the same thread, we will just face the same
+    // situation with #1.
+    // 3. The writer is broken, and syncFailed has been called. Then when we arrive here, there are
+    // only 2 possible situations:
+    // a. we arrive before we actually roll the writer, then we will find out the writer is broken
+    // and give up.
+    // b. we arrive after we actually roll the writer, then we will find out the epoch is changed
+    // and give up.
+    // For both #a and #b, we do not need to hold the consumeLock as we will always update the
+    // epochAndState as a whole.
+    // So in general, for all the cases above, we do not need to hold the consumeLock.
+    int epochAndState = this.epochAndState;
+    if (epoch(epochAndState) != epochWhenSync || writerBroken(epochAndState)) {
+      LOG.warn("Got a sync complete call after the writer is broken, skip");
+      return;
+    }
    highestSyncedTxid.set(processedTxid);
    for (Iterator<FSWALEntry> iter = unackedAppends.iterator(); iter.hasNext();) {
      FSWALEntry entry = iter.next();
@ -396,7 +427,7 @@ public class AsyncFSWAL extends AbstractFSWAL<AsyncWriter> {
      if (error != null) {
        syncFailed(epoch, error);
      } else {
-        syncCompleted(writer, currentHighestProcessedAppendTxid, startTimeNs);
+        syncCompleted(epoch, writer, currentHighestProcessedAppendTxid, startTimeNs);
      }
    }, consumeExecutor);
  }