HBASE-19635 Introduce a thread at RS side to call reportProcedureDone
This commit is contained in:
parent
d9b5eb3abb
commit
4c6942df58
|
@ -146,7 +146,7 @@ message RegionSpaceUseReportRequest {
|
||||||
message RegionSpaceUseReportResponse {
|
message RegionSpaceUseReportResponse {
|
||||||
}
|
}
|
||||||
|
|
||||||
message ReportProcedureDoneRequest {
|
message RemoteProcedureResult {
|
||||||
required uint64 proc_id = 1;
|
required uint64 proc_id = 1;
|
||||||
enum Status {
|
enum Status {
|
||||||
SUCCESS = 1;
|
SUCCESS = 1;
|
||||||
|
@ -155,6 +155,9 @@ message ReportProcedureDoneRequest {
|
||||||
required Status status = 2;
|
required Status status = 2;
|
||||||
optional ForeignExceptionMessage error = 3;
|
optional ForeignExceptionMessage error = 3;
|
||||||
}
|
}
|
||||||
|
message ReportProcedureDoneRequest {
|
||||||
|
repeated RemoteProcedureResult result = 1;
|
||||||
|
}
|
||||||
|
|
||||||
message ReportProcedureDoneResponse {
|
message ReportProcedureDoneResponse {
|
||||||
}
|
}
|
||||||
|
|
|
@ -268,6 +268,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUse;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUse;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportRequest;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportRequest;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportResponse;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportResponse;
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RemoteProcedureResult;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneRequest;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneRequest;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneResponse;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneResponse;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
|
||||||
|
@ -2236,12 +2237,14 @@ public class MasterRpcServices extends RSRpcServices
|
||||||
@Override
|
@Override
|
||||||
public ReportProcedureDoneResponse reportProcedureDone(RpcController controller,
|
public ReportProcedureDoneResponse reportProcedureDone(RpcController controller,
|
||||||
ReportProcedureDoneRequest request) throws ServiceException {
|
ReportProcedureDoneRequest request) throws ServiceException {
|
||||||
if (request.getStatus() == ReportProcedureDoneRequest.Status.SUCCESS) {
|
request.getResultList().forEach(result -> {
|
||||||
master.remoteProcedureCompleted(request.getProcId());
|
if (result.getStatus() == RemoteProcedureResult.Status.SUCCESS) {
|
||||||
|
master.remoteProcedureCompleted(result.getProcId());
|
||||||
} else {
|
} else {
|
||||||
master.remoteProcedureFailed(request.getProcId(),
|
master.remoteProcedureFailed(result.getProcId(),
|
||||||
RemoteProcedureException.fromProto(request.getError()));
|
RemoteProcedureException.fromProto(result.getError()));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
return ReportProcedureDoneResponse.getDefaultInstance();
|
return ReportProcedureDoneResponse.getDefaultInstance();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -144,7 +144,6 @@ import org.apache.hadoop.hbase.util.CompressionTest;
|
||||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
import org.apache.hadoop.hbase.util.FSTableDescriptors;
|
import org.apache.hadoop.hbase.util.FSTableDescriptors;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
|
|
||||||
import org.apache.hadoop.hbase.util.HasThread;
|
import org.apache.hadoop.hbase.util.HasThread;
|
||||||
import org.apache.hadoop.hbase.util.JvmPauseMonitor;
|
import org.apache.hadoop.hbase.util.JvmPauseMonitor;
|
||||||
import org.apache.hadoop.hbase.util.NettyEventLoopGroupConfig;
|
import org.apache.hadoop.hbase.util.NettyEventLoopGroupConfig;
|
||||||
|
@ -381,6 +380,9 @@ public class HRegionServer extends HasThread implements
|
||||||
// eclipse warning when accessed by inner classes
|
// eclipse warning when accessed by inner classes
|
||||||
protected LogRoller walRoller;
|
protected LogRoller walRoller;
|
||||||
|
|
||||||
|
// A thread which calls reportProcedureDone
|
||||||
|
private RemoteProcedureResultReporter procedureResultReporter;
|
||||||
|
|
||||||
// flag set after we're done setting up server threads
|
// flag set after we're done setting up server threads
|
||||||
final AtomicBoolean online = new AtomicBoolean(false);
|
final AtomicBoolean online = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
@ -1858,6 +1860,7 @@ public class HRegionServer extends HasThread implements
|
||||||
|
|
||||||
this.walRoller = new LogRoller(this, this);
|
this.walRoller = new LogRoller(this, this);
|
||||||
this.flushThroughputController = FlushThroughputControllerFactory.create(this, conf);
|
this.flushThroughputController = FlushThroughputControllerFactory.create(this, conf);
|
||||||
|
this.procedureResultReporter = new RemoteProcedureResultReporter(this);
|
||||||
|
|
||||||
// Create the CompactedFileDischarger chore executorService. This chore helps to
|
// Create the CompactedFileDischarger chore executorService. This chore helps to
|
||||||
// remove the compacted files
|
// remove the compacted files
|
||||||
|
@ -1901,6 +1904,8 @@ public class HRegionServer extends HasThread implements
|
||||||
Threads.setDaemonThreadRunning(this.walRoller.getThread(), getName() + ".logRoller",
|
Threads.setDaemonThreadRunning(this.walRoller.getThread(), getName() + ".logRoller",
|
||||||
uncaughtExceptionHandler);
|
uncaughtExceptionHandler);
|
||||||
this.cacheFlusher.start(uncaughtExceptionHandler);
|
this.cacheFlusher.start(uncaughtExceptionHandler);
|
||||||
|
Threads.setDaemonThreadRunning(this.procedureResultReporter,
|
||||||
|
getName() + ".procedureResultReporter", uncaughtExceptionHandler);
|
||||||
|
|
||||||
if (this.compactionChecker != null) choreService.scheduleChore(compactionChecker);
|
if (this.compactionChecker != null) choreService.scheduleChore(compactionChecker);
|
||||||
if (this.periodicFlusher != null) choreService.scheduleChore(periodicFlusher);
|
if (this.periodicFlusher != null) choreService.scheduleChore(periodicFlusher);
|
||||||
|
@ -3680,55 +3685,26 @@ public class HRegionServer extends HasThread implements
|
||||||
executorService.submit(new RSProcedureHandler(this, procId, callable));
|
executorService.submit(new RSProcedureHandler(this, procId, callable));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reportProcedureDone(long procId, Throwable error) {
|
public void remoteProcedureComplete(long procId, Throwable error) {
|
||||||
ReportProcedureDoneRequest.Builder builder =
|
procedureResultReporter.complete(procId, error);
|
||||||
ReportProcedureDoneRequest.newBuilder().setProcId(procId);
|
|
||||||
if (error != null) {
|
|
||||||
builder.setStatus(ReportProcedureDoneRequest.Status.ERROR)
|
|
||||||
.setError(ForeignExceptionUtil.toProtoForeignException(serverName.toString(), error));
|
|
||||||
} else {
|
|
||||||
builder.setStatus(ReportProcedureDoneRequest.Status.SUCCESS);
|
|
||||||
}
|
}
|
||||||
ReportProcedureDoneRequest request = builder.build();
|
|
||||||
int tries = 0;
|
void reportProcedureDone(ReportProcedureDoneRequest request) throws IOException {
|
||||||
long pauseTime = INIT_PAUSE_TIME_MS;
|
|
||||||
while (keepLooping()) {
|
|
||||||
RegionServerStatusService.BlockingInterface rss = rssStub;
|
RegionServerStatusService.BlockingInterface rss = rssStub;
|
||||||
try {
|
for (;;) {
|
||||||
if (rss == null) {
|
rss = rssStub;
|
||||||
|
if (rss != null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
createRegionServerStatusStub();
|
createRegionServerStatusStub();
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
rss.reportProcedureDone(null, request);
|
rss.reportProcedureDone(null, request);
|
||||||
// Log if we had to retry else don't log unless TRACE. We want to
|
|
||||||
// know if were successful after an attempt showed in logs as failed.
|
|
||||||
if (tries > 0 || LOG.isTraceEnabled()) {
|
|
||||||
LOG.info("PROCEDURE REPORTED " + request);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
} catch (ServiceException se) {
|
} catch (ServiceException se) {
|
||||||
IOException ioe = ProtobufUtil.getRemoteException(se);
|
|
||||||
boolean pause =
|
|
||||||
ioe instanceof ServerNotRunningYetException || ioe instanceof PleaseHoldException;
|
|
||||||
if (pause) {
|
|
||||||
// Do backoff else we flood the Master with requests.
|
|
||||||
pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
|
|
||||||
} else {
|
|
||||||
pauseTime = INIT_PAUSE_TIME_MS; // Reset.
|
|
||||||
}
|
|
||||||
LOG.info(
|
|
||||||
"Failed to report transition " + TextFormat.shortDebugString(request) + "; retry (#" +
|
|
||||||
tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."
|
|
||||||
: " immediately."),
|
|
||||||
ioe);
|
|
||||||
if (pause) {
|
|
||||||
Threads.sleep(pauseTime);
|
|
||||||
}
|
|
||||||
tries++;
|
|
||||||
if (rssStub == rss) {
|
if (rssStub == rss) {
|
||||||
rssStub = null;
|
rssStub = null;
|
||||||
}
|
}
|
||||||
}
|
throw ProtobufUtil.getRemoteException(se);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import org.apache.hadoop.hbase.PleaseHoldException;
|
||||||
|
import org.apache.hadoop.hbase.client.ConnectionUtils;
|
||||||
|
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
|
||||||
|
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
|
||||||
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.shaded.com.google.protobuf.TextFormat;
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RemoteProcedureResult;
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneRequest;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A thread which calls {@code reportProcedureDone} to tell master the result of a remote procedure.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
class RemoteProcedureResultReporter extends Thread {
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(RemoteProcedureResultReporter.class);
|
||||||
|
|
||||||
|
// Time to pause if master says 'please hold'. Make configurable if needed.
|
||||||
|
private static final int INIT_PAUSE_TIME_MS = 1000;
|
||||||
|
|
||||||
|
private static final int MAX_BATCH = 100;
|
||||||
|
|
||||||
|
private final HRegionServer server;
|
||||||
|
|
||||||
|
private final LinkedBlockingQueue<RemoteProcedureResult> results = new LinkedBlockingQueue<>();
|
||||||
|
|
||||||
|
public RemoteProcedureResultReporter(HRegionServer server) {
|
||||||
|
this.server = server;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void complete(long procId, Throwable error) {
|
||||||
|
RemoteProcedureResult.Builder builder = RemoteProcedureResult.newBuilder().setProcId(procId);
|
||||||
|
if (error != null) {
|
||||||
|
builder.setStatus(RemoteProcedureResult.Status.ERROR).setError(
|
||||||
|
ForeignExceptionUtil.toProtoForeignException(server.getServerName().toString(), error));
|
||||||
|
} else {
|
||||||
|
builder.setStatus(RemoteProcedureResult.Status.SUCCESS);
|
||||||
|
}
|
||||||
|
results.add(builder.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
ReportProcedureDoneRequest.Builder builder = ReportProcedureDoneRequest.newBuilder();
|
||||||
|
int tries = 0;
|
||||||
|
while (!server.isStopped()) {
|
||||||
|
if (builder.getResultCount() == 0) {
|
||||||
|
try {
|
||||||
|
builder.addResult(results.take());
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (builder.getResultCount() < MAX_BATCH) {
|
||||||
|
RemoteProcedureResult result = results.poll();
|
||||||
|
if (result == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
builder.addResult(result);
|
||||||
|
}
|
||||||
|
ReportProcedureDoneRequest request = builder.build();
|
||||||
|
try {
|
||||||
|
server.reportProcedureDone(builder.build());
|
||||||
|
builder.clear();
|
||||||
|
tries = 0;
|
||||||
|
} catch (IOException e) {
|
||||||
|
boolean pause =
|
||||||
|
e instanceof ServerNotRunningYetException || e instanceof PleaseHoldException;
|
||||||
|
long pauseTime;
|
||||||
|
if (pause) {
|
||||||
|
// Do backoff else we flood the Master with requests.
|
||||||
|
pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
|
||||||
|
} else {
|
||||||
|
pauseTime = INIT_PAUSE_TIME_MS; // Reset.
|
||||||
|
}
|
||||||
|
LOG.info("Failed report procedure " + TextFormat.shortDebugString(request) + "; retry (#" +
|
||||||
|
tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."
|
||||||
|
: " immediately."),
|
||||||
|
e);
|
||||||
|
Threads.sleep(pauseTime);
|
||||||
|
tries++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -49,6 +49,6 @@ public class RSProcedureHandler extends EventHandler {
|
||||||
LOG.error("Catch exception when call RSProcedureCallable: ", e);
|
LOG.error("Catch exception when call RSProcedureCallable: ", e);
|
||||||
error = e;
|
error = e;
|
||||||
}
|
}
|
||||||
((HRegionServer) server).reportProcedureDone(procId, error);
|
((HRegionServer) server).remoteProcedureComplete(procId, error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue