From 489f07d945d7bee0d0970a682542ddfeb160c078 Mon Sep 17 00:00:00 2001 From: stack Date: Thu, 18 Jun 2020 08:37:19 -0700 Subject: [PATCH] HBASE-24574 Procedure V2 - Distributed WAL Splitting => LOGGING (#1912) Addendum; minor log edits --- .../master/procedure/SplitWALProcedure.java | 7 +++++-- .../RemoteProcedureResultReporter.java | 6 +++--- .../hbase/regionserver/SplitLogWorker.java | 21 ++++++++----------- .../hbase/regionserver/SplitWALCallable.java | 4 ++-- .../handler/WALSplitterHandler.java | 1 + .../wal/BoundedRecoveredHFilesOutputSink.java | 5 +++-- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SplitWALProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SplitWALProcedure.java index 81d525d6242..4ae408f417d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SplitWALProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SplitWALProcedure.java @@ -185,8 +185,11 @@ public class SplitWALProcedure @Override protected void afterReplay(MasterProcedureEnv env){ - if(worker != null){ - env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker); + if (worker != null) { + if (env != null && env.getMasterServices() != null && + env.getMasterServices().getSplitWALManager() != null) { + env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker); + } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java index efb044a7846..981f090534a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java @@ -57,11 +57,11 @@ class RemoteProcedureResultReporter extends Thread { public void complete(long procId, Throwable error) { RemoteProcedureResult.Builder builder = RemoteProcedureResult.newBuilder().setProcId(procId); if (error != null) { - LOG.debug("Failed to complete execution of proc pid={}", procId, error); + LOG.debug("Failed to complete execution of pid={}", procId, error); builder.setStatus(RemoteProcedureResult.Status.ERROR).setError( ForeignExceptionUtil.toProtoForeignException(server.getServerName().toString(), error)); } else { - LOG.debug("Successfully complete execution of proc pid={}", procId); + LOG.debug("Successfully complete execution of pid={}", procId); builder.setStatus(RemoteProcedureResult.Status.SUCCESS); } results.add(builder.build()); @@ -102,7 +102,7 @@ class RemoteProcedureResultReporter extends Thread { } else { pauseTime = INIT_PAUSE_TIME_MS; // Reset. } - LOG.info("Failed report procedure " + TextFormat.shortDebugString(request) + "; retry (#" + + LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)." : " immediately."), e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java index 74a3ea26d3b..5c1ded74239 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java @@ -1,5 +1,4 @@ -/** - * +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -49,7 +48,6 @@ import org.apache.hadoop.hbase.wal.WALSplitter; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; /** @@ -162,7 +160,7 @@ public class SplitLogWorker implements Runnable { walDir = CommonFSUtils.getWALRootDir(conf); fs = walDir.getFileSystem(conf); } catch (IOException e) { - LOG.warn("could not find root dir or fs", e); + LOG.warn("Resigning, could not find root dir or fs", e); return Status.RESIGNED; } try { @@ -185,25 +183,24 @@ public class SplitLogWorker implements Runnable { return Status.PREEMPTED; } } catch (InterruptedIOException iioe) { - LOG.warn("log splitting of " + name + " interrupted, resigning", iioe); + LOG.warn("Resigning, interrupted splitting WAL {}", filename, iioe); return Status.RESIGNED; } catch (IOException e) { if (e instanceof FileNotFoundException) { // A wal file may not exist anymore. Nothing can be recovered so move on - LOG.warn("WAL {} does not exist anymore", name, e); + LOG.warn("Done, WAL {} does not exist anymore", filename, e); return Status.DONE; } Throwable cause = e.getCause(); - if (e instanceof RetriesExhaustedException && (cause instanceof NotServingRegionException || - cause instanceof ConnectException || cause instanceof SocketTimeoutException)) { - LOG.warn("log replaying of " + name + " can't connect to the target regionserver, " + - "resigning", e); + if (e instanceof RetriesExhaustedException && (cause instanceof NotServingRegionException + || cause instanceof ConnectException || cause instanceof SocketTimeoutException)) { + LOG.warn("Resigning, can't connect to target regionserver splitting WAL {}", filename, e); return Status.RESIGNED; } else if (cause instanceof InterruptedException) { - LOG.warn("log splitting of " + name + " interrupted, resigning", e); + LOG.warn("Resigning, interrupted splitting WAL {}", filename, e); return Status.RESIGNED; } - LOG.warn("log splitting of " + name + " failed, returning error", e); + LOG.warn("Error splitting WAL {}", filename, e); return Status.ERR; } return Status.DONE; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitWALCallable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitWALCallable.java index 7462cb97a7e..bbd3d3ddb5f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitWALCallable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitWALCallable.java @@ -101,9 +101,9 @@ public class SplitWALCallable implements RSProcedureCallable { private void splitWal() throws IOException { SplitLogWorker.TaskExecutor.Status status = - SplitLogWorker.splitLog(walPath, null, rs.getConfiguration(), rs, rs, rs.getWalFactory()); + SplitLogWorker.splitLog(walPath, null, rs.getConfiguration(), rs, rs, rs.getWalFactory()); if (status != SplitLogWorker.TaskExecutor.Status.DONE) { - throw new IOException("Split WAL " + walPath + " failed at server "); + throw new IOException("Failed WAL split, status=" + status + ", wal=" + walPath); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/WALSplitterHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/WALSplitterHandler.java index 49ab574ec52..d6009e388fa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/WALSplitterHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/WALSplitterHandler.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.util.CancelableProgressable; /** * Handles log splitting a wal + * Used by the zk-based distributed log splitting. Created by ZKSplitLogWorkerCoordination. */ @InterfaceAudience.Private public class WALSplitterHandler extends EventHandler { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/BoundedRecoveredHFilesOutputSink.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/BoundedRecoveredHFilesOutputSink.java index b8a60bc06ad..0c6f79e93c5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/BoundedRecoveredHFilesOutputSink.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/BoundedRecoveredHFilesOutputSink.java @@ -226,13 +226,14 @@ public class BoundedRecoveredHFilesOutputSink extends OutputSink { try { return walSplitter.rsServices.getConnection().getAdmin().getDescriptor(tableName); } catch (IOException e) { - LOG.warn("Failed to get table descriptor for table {}", tableName, e); + LOG.warn("Failed to get table descriptor for {}", tableName, e); } } + LOG.info("Failed getting {} table descriptor from master; trying local", tableName); try { return walSplitter.tableDescriptors.get(tableName); } catch (IOException e) { - LOG.warn("Failed to get table descriptor for table {}", tableName, e); + LOG.warn("Failed to get table descriptor for {}", tableName, e); return null; } }