HBASE-24574 Procedure V2 - Distributed WAL Splitting => LOGGING (#1912)

Addendum; minor log edits
This commit is contained in:
stack 2020-06-18 08:37:19 -07:00
parent 6eb93287cc
commit 7b171a3c1a
6 changed files with 21 additions and 20 deletions

View File

@ -185,8 +185,11 @@ public class SplitWALProcedure
@Override
protected void afterReplay(MasterProcedureEnv env){
if(worker != null){
env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker);
if (worker != null) {
if (env != null && env.getMasterServices() != null &&
env.getMasterServices().getSplitWALManager() != null) {
env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker);
}
}
}

View File

@ -57,11 +57,11 @@ class RemoteProcedureResultReporter extends Thread {
public void complete(long procId, Throwable error) {
RemoteProcedureResult.Builder builder = RemoteProcedureResult.newBuilder().setProcId(procId);
if (error != null) {
LOG.debug("Failed to complete execution of proc pid={}", procId, error);
LOG.debug("Failed to complete execution of pid={}", procId, error);
builder.setStatus(RemoteProcedureResult.Status.ERROR).setError(
ForeignExceptionUtil.toProtoForeignException(server.getServerName().toString(), error));
} else {
LOG.debug("Successfully complete execution of proc pid={}", procId);
LOG.debug("Successfully complete execution of pid={}", procId);
builder.setStatus(RemoteProcedureResult.Status.SUCCESS);
}
results.add(builder.build());
@ -102,7 +102,7 @@ class RemoteProcedureResultReporter extends Thread {
} else {
pauseTime = INIT_PAUSE_TIME_MS; // Reset.
}
LOG.info("Failed report procedure " + TextFormat.shortDebugString(request) + "; retry (#" +
LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" +
tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."
: " immediately."),
e);

View File

@ -1,5 +1,4 @@
/**
*
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -39,7 +38,6 @@ import org.apache.hadoop.hbase.wal.WALSplitter;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
/**
@ -91,7 +89,7 @@ public class SplitLogWorker implements Runnable {
walDir = CommonFSUtils.getWALRootDir(conf);
fs = walDir.getFileSystem(conf);
} catch (IOException e) {
LOG.warn("could not find root dir or fs", e);
LOG.warn("Resigning, could not find root dir or fs", e);
return Status.RESIGNED;
}
// TODO have to correctly figure out when log splitting has been
@ -106,26 +104,24 @@ public class SplitLogWorker implements Runnable {
return Status.PREEMPTED;
}
} catch (InterruptedIOException iioe) {
LOG.warn("log splitting of " + filename + " interrupted, resigning", iioe);
LOG.warn("Resigning, interrupted splitting WAL {}", filename, iioe);
return Status.RESIGNED;
} catch (IOException e) {
if (e instanceof FileNotFoundException) {
// A wal file may not exist anymore. Nothing can be recovered so move on
LOG.warn("WAL {} does not exist anymore", filename, e);
LOG.warn("Done, WAL {} does not exist anymore", filename, e);
return Status.DONE;
}
Throwable cause = e.getCause();
if (e instanceof RetriesExhaustedException && (cause instanceof NotServingRegionException
|| cause instanceof ConnectException || cause instanceof SocketTimeoutException)) {
LOG.warn("log replaying of " + filename + " can't connect to the target regionserver, "
+ "resigning",
e);
LOG.warn("Resigning, can't connect to target regionserver splitting WAL {}", filename, e);
return Status.RESIGNED;
} else if (cause instanceof InterruptedException) {
LOG.warn("log splitting of " + filename + " interrupted, resigning", e);
LOG.warn("Resigning, interrupted splitting WAL {}", filename, e);
return Status.RESIGNED;
}
LOG.warn("log splitting of " + filename + " failed, returning error", e);
LOG.warn("Error splitting WAL {}", filename, e);
return Status.ERR;
}
return Status.DONE;

View File

@ -101,9 +101,9 @@ public class SplitWALCallable implements RSProcedureCallable {
private void splitWal() throws IOException {
SplitLogWorker.TaskExecutor.Status status =
SplitLogWorker.splitLog(walPath, null, rs.getConfiguration(), rs, rs, rs.getWalFactory());
SplitLogWorker.splitLog(walPath, null, rs.getConfiguration(), rs, rs, rs.getWalFactory());
if (status != SplitLogWorker.TaskExecutor.Status.DONE) {
throw new IOException("Split WAL " + walPath + " failed at server ");
throw new IOException("Failed WAL split, status=" + status + ", wal=" + walPath);
}
}
}

View File

@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.util.CancelableProgressable;
/**
* Handles log splitting a wal
* Used by the zk-based distributed log splitting. Created by ZKSplitLogWorkerCoordination.
*/
@InterfaceAudience.Private
public class WALSplitterHandler extends EventHandler {

View File

@ -226,13 +226,14 @@ public class BoundedRecoveredHFilesOutputSink extends OutputSink {
try {
return walSplitter.rsServices.getConnection().getAdmin().getDescriptor(tableName);
} catch (IOException e) {
LOG.warn("Failed to get table descriptor for table {}", tableName, e);
LOG.warn("Failed to get table descriptor for {}", tableName, e);
}
}
LOG.info("Failed getting {} table descriptor from master; trying local", tableName);
try {
return walSplitter.tableDescriptors.get(tableName);
} catch (IOException e) {
LOG.warn("Failed to get table descriptor for table {}", tableName, e);
LOG.warn("Failed to get table descriptor for {}", tableName, e);
return null;
}
}