HBASE-21406 "status 'replication'" should not show SINK if the cluste… (#1761)
Signed-off-by: Jan Hentschel <jan.hentschel@ultratendency.com> Signed-off by: Viraj Jasani <vjasani@apache.org> Signed-off-by: Josh Elser <elserj@apache.org>
This commit is contained in:
parent
bad2d4e409
commit
e5345b3a7c
|
@ -19,12 +19,17 @@ import org.apache.yetus.audience.InterfaceAudience;
|
||||||
public class ReplicationLoadSink {
|
public class ReplicationLoadSink {
|
||||||
private final long ageOfLastAppliedOp;
|
private final long ageOfLastAppliedOp;
|
||||||
private final long timestampsOfLastAppliedOp;
|
private final long timestampsOfLastAppliedOp;
|
||||||
|
private final long timestampStarted;
|
||||||
|
private final long totalOpsProcessed;
|
||||||
|
|
||||||
// TODO: add the builder for this class
|
// TODO: add the builder for this class
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public ReplicationLoadSink(long age, long timestamp) {
|
public ReplicationLoadSink(long age, long timestamp, long timestampStarted,
|
||||||
|
long totalOpsProcessed) {
|
||||||
this.ageOfLastAppliedOp = age;
|
this.ageOfLastAppliedOp = age;
|
||||||
this.timestampsOfLastAppliedOp = timestamp;
|
this.timestampsOfLastAppliedOp = timestamp;
|
||||||
|
this.timestampStarted = timestampStarted;
|
||||||
|
this.totalOpsProcessed = totalOpsProcessed;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getAgeOfLastAppliedOp() {
|
public long getAgeOfLastAppliedOp() {
|
||||||
|
@ -34,4 +39,12 @@ public class ReplicationLoadSink {
|
||||||
public long getTimestampsOfLastAppliedOp() {
|
public long getTimestampsOfLastAppliedOp() {
|
||||||
return this.timestampsOfLastAppliedOp;
|
return this.timestampsOfLastAppliedOp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getTimestampStarted() {
|
||||||
|
return timestampStarted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTotalOpsProcessed() {
|
||||||
|
return totalOpsProcessed;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2843,7 +2843,10 @@ public final class ProtobufUtil {
|
||||||
|
|
||||||
public static ReplicationLoadSink toReplicationLoadSink(
|
public static ReplicationLoadSink toReplicationLoadSink(
|
||||||
ClusterStatusProtos.ReplicationLoadSink rls) {
|
ClusterStatusProtos.ReplicationLoadSink rls) {
|
||||||
return new ReplicationLoadSink(rls.getAgeOfLastAppliedOp(), rls.getTimeStampsOfLastAppliedOp());
|
return new ReplicationLoadSink(rls.getAgeOfLastAppliedOp(),
|
||||||
|
rls.getTimeStampsOfLastAppliedOp(),
|
||||||
|
rls.getTimestampStarted(),
|
||||||
|
rls.getTotalOpsProcessed());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ReplicationLoadSource toReplicationLoadSource(
|
public static ReplicationLoadSource toReplicationLoadSource(
|
||||||
|
@ -3438,6 +3441,8 @@ public final class ProtobufUtil {
|
||||||
return ClusterStatusProtos.ReplicationLoadSink.newBuilder()
|
return ClusterStatusProtos.ReplicationLoadSink.newBuilder()
|
||||||
.setAgeOfLastAppliedOp(rls.getAgeOfLastAppliedOp())
|
.setAgeOfLastAppliedOp(rls.getAgeOfLastAppliedOp())
|
||||||
.setTimeStampsOfLastAppliedOp(rls.getTimestampsOfLastAppliedOp())
|
.setTimeStampsOfLastAppliedOp(rls.getTimestampsOfLastAppliedOp())
|
||||||
|
.setTimestampStarted(rls.getTimestampStarted())
|
||||||
|
.setTotalOpsProcessed(rls.getTotalOpsProcessed())
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,4 +32,5 @@ public interface MetricsReplicationSinkSource {
|
||||||
void incrAppliedOps(long batchsize);
|
void incrAppliedOps(long batchsize);
|
||||||
long getLastAppliedOpAge();
|
long getLastAppliedOpAge();
|
||||||
void incrAppliedHFiles(long hfileSize);
|
void incrAppliedHFiles(long hfileSize);
|
||||||
|
long getSinkAppliedOps();
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,4 +58,8 @@ public class MetricsReplicationSinkSourceImpl implements MetricsReplicationSinkS
|
||||||
public void incrAppliedHFiles(long hfiles) {
|
public void incrAppliedHFiles(long hfiles) {
|
||||||
hfilesCounter.incr(hfiles);
|
hfilesCounter.incr(hfiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override public long getSinkAppliedOps() {
|
||||||
|
return opsCounter.value();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -185,6 +185,8 @@ message ClientMetrics {
|
||||||
message ReplicationLoadSink {
|
message ReplicationLoadSink {
|
||||||
required uint64 ageOfLastAppliedOp = 1;
|
required uint64 ageOfLastAppliedOp = 1;
|
||||||
required uint64 timeStampsOfLastAppliedOp = 2;
|
required uint64 timeStampsOfLastAppliedOp = 2;
|
||||||
|
required uint64 timestampStarted = 3;
|
||||||
|
required uint64 totalOpsProcessed = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ReplicationLoadSource {
|
message ReplicationLoadSource {
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
|
||||||
public class MetricsSink {
|
public class MetricsSink {
|
||||||
|
|
||||||
private long lastTimestampForAge = System.currentTimeMillis();
|
private long lastTimestampForAge = System.currentTimeMillis();
|
||||||
|
private long startTimestamp = System.currentTimeMillis();
|
||||||
private final MetricsReplicationSinkSource mss;
|
private final MetricsReplicationSinkSource mss;
|
||||||
|
|
||||||
public MetricsSink() {
|
public MetricsSink() {
|
||||||
|
@ -98,4 +99,21 @@ public class MetricsSink {
|
||||||
public long getTimestampOfLastAppliedOp() {
|
public long getTimestampOfLastAppliedOp() {
|
||||||
return this.lastTimestampForAge;
|
return this.lastTimestampForAge;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the time stamp from when the Sink was initialized.
|
||||||
|
* @return startTimestamp
|
||||||
|
*/
|
||||||
|
public long getStartTimestamp() {
|
||||||
|
return this.startTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the total number of OPs delivered to this sink.
|
||||||
|
* @return totalAplliedOps
|
||||||
|
*/
|
||||||
|
public long getAppliedOps() {
|
||||||
|
return this.mss.getSinkAppliedOps();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,6 +61,8 @@ public class ReplicationLoad {
|
||||||
ClusterStatusProtos.ReplicationLoadSink.newBuilder();
|
ClusterStatusProtos.ReplicationLoadSink.newBuilder();
|
||||||
rLoadSinkBuild.setAgeOfLastAppliedOp(sinkMetrics.getAgeOfLastAppliedOp());
|
rLoadSinkBuild.setAgeOfLastAppliedOp(sinkMetrics.getAgeOfLastAppliedOp());
|
||||||
rLoadSinkBuild.setTimeStampsOfLastAppliedOp(sinkMetrics.getTimestampOfLastAppliedOp());
|
rLoadSinkBuild.setTimeStampsOfLastAppliedOp(sinkMetrics.getTimestampOfLastAppliedOp());
|
||||||
|
rLoadSinkBuild.setTimestampStarted(sinkMetrics.getStartTimestamp());
|
||||||
|
rLoadSinkBuild.setTotalOpsProcessed(sinkMetrics.getAppliedOps());
|
||||||
this.replicationLoadSink = rLoadSinkBuild.build();
|
this.replicationLoadSink = rLoadSinkBuild.build();
|
||||||
|
|
||||||
this.replicationLoadSourceEntries = new ArrayList<>();
|
this.replicationLoadSourceEntries = new ArrayList<>();
|
||||||
|
|
|
@ -50,6 +50,15 @@ public class TestReplicationStatus extends TestReplicationBase {
|
||||||
public static final HBaseClassTestRule CLASS_RULE =
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
HBaseClassTestRule.forClass(TestReplicationStatus.class);
|
HBaseClassTestRule.forClass(TestReplicationStatus.class);
|
||||||
|
|
||||||
|
private void insertRowsOnSource() throws IOException {
|
||||||
|
final byte[] qualName = Bytes.toBytes("q");
|
||||||
|
for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
|
||||||
|
Put p = new Put(Bytes.toBytes("row" + i));
|
||||||
|
p.addColumn(famName, qualName, Bytes.toBytes("val" + i));
|
||||||
|
htable1.put(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test for HBASE-9531.
|
* Test for HBASE-9531.
|
||||||
* <p/>
|
* <p/>
|
||||||
|
@ -70,12 +79,7 @@ public class TestReplicationStatus extends TestReplicationBase {
|
||||||
Admin hbaseAdmin = UTIL1.getAdmin();
|
Admin hbaseAdmin = UTIL1.getAdmin();
|
||||||
// disable peer <= WHY? I DON'T GET THIS DISABLE BUT TEST FAILS W/O IT.
|
// disable peer <= WHY? I DON'T GET THIS DISABLE BUT TEST FAILS W/O IT.
|
||||||
hbaseAdmin.disableReplicationPeer(PEER_ID2);
|
hbaseAdmin.disableReplicationPeer(PEER_ID2);
|
||||||
final byte[] qualName = Bytes.toBytes("q");
|
insertRowsOnSource();
|
||||||
for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
|
|
||||||
Put p = new Put(Bytes.toBytes("row" + i));
|
|
||||||
p.addColumn(famName, qualName, Bytes.toBytes("val" + i));
|
|
||||||
htable1.put(p);
|
|
||||||
}
|
|
||||||
LOG.info("AFTER PUTS");
|
LOG.info("AFTER PUTS");
|
||||||
// TODO: Change this wait to a barrier. I tried waiting on replication stats to
|
// TODO: Change this wait to a barrier. I tried waiting on replication stats to
|
||||||
// change but sleeping in main thread seems to mess up background replication.
|
// change but sleeping in main thread seems to mess up background replication.
|
||||||
|
@ -120,6 +124,35 @@ public class TestReplicationStatus extends TestReplicationBase {
|
||||||
assertEquals(PEER_ID2, rLoadSourceList.get(0).getPeerID());
|
assertEquals(PEER_ID2, rLoadSourceList.get(0).getPeerID());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplicationStatusSink() throws Exception {
|
||||||
|
try (Admin hbaseAdmin = UTIL2.getConnection().getAdmin()) {
|
||||||
|
ServerName server = UTIL2.getHBaseCluster().getRegionServer(0).getServerName();
|
||||||
|
ReplicationLoadSink loadSink = getLatestSinkMetric(hbaseAdmin, server);
|
||||||
|
//First checks if status of timestamp of last applied op is same as RS start, since no edits
|
||||||
|
//were replicated yet
|
||||||
|
assertEquals(loadSink.getTimestampStarted(), loadSink.getTimestampsOfLastAppliedOp());
|
||||||
|
//now insert some rows on source, so that it gets delivered to target
|
||||||
|
insertRowsOnSource();
|
||||||
|
long wait = Waiter.waitFor(UTIL2.getConfiguration(),
|
||||||
|
10000, new Waiter.Predicate<Exception>() {
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
ReplicationLoadSink loadSink = getLatestSinkMetric(hbaseAdmin, server);
|
||||||
|
return loadSink.getTimestampsOfLastAppliedOp()>loadSink.getTimestampStarted();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
//If wait is -1, we know predicate condition was never true
|
||||||
|
assertTrue(wait>=0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ReplicationLoadSink getLatestSinkMetric(Admin admin, ServerName server)
|
||||||
|
throws IOException {
|
||||||
|
ClusterMetrics metrics = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS));
|
||||||
|
ServerMetrics sm = metrics.getLiveServerMetrics().get(server);
|
||||||
|
return sm.getReplicationLoadSink();
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Wait until Master shows metrics counts for ReplicationLoadSourceList that are
|
* Wait until Master shows metrics counts for ReplicationLoadSourceList that are
|
||||||
* greater than <code>greaterThan</code> for <code>serverName</code> before
|
* greater than <code>greaterThan</code> for <code>serverName</code> before
|
||||||
|
|
|
@ -835,12 +835,18 @@ module Hbase
|
||||||
r_source_string = ' SOURCE:'
|
r_source_string = ' SOURCE:'
|
||||||
r_load_sink = sl.getReplicationLoadSink
|
r_load_sink = sl.getReplicationLoadSink
|
||||||
next if r_load_sink.nil?
|
next if r_load_sink.nil?
|
||||||
|
if r_load_sink.getTimestampsOfLastAppliedOp() == r_load_sink.getTimestampStarted()
|
||||||
|
# If we have applied no operations since we've started replication,
|
||||||
|
# assume that we're not acting as a sink and don't print the normal information
|
||||||
|
r_sink_string << " TimeStampStarted=" + r_load_sink.getTimestampStarted().to_s
|
||||||
|
r_sink_string << ", Waiting for OPs... "
|
||||||
|
else
|
||||||
|
r_sink_string << " TimeStampStarted=" + r_load_sink.getTimestampStarted().to_s
|
||||||
|
r_sink_string << ", AgeOfLastAppliedOp=" + r_load_sink.getAgeOfLastAppliedOp().to_s
|
||||||
|
r_sink_string << ", TimeStampsOfLastAppliedOp=" +
|
||||||
|
(java.util.Date.new(r_load_sink.getTimestampsOfLastAppliedOp())).toString()
|
||||||
|
end
|
||||||
|
|
||||||
r_sink_string << ' AgeOfLastAppliedOp=' +
|
|
||||||
r_load_sink.getAgeOfLastAppliedOp.to_s
|
|
||||||
r_sink_string << ', TimeStampsOfLastAppliedOp=' +
|
|
||||||
java.util.Date.new(r_load_sink
|
|
||||||
.getTimestampsOfLastAppliedOp).toString
|
|
||||||
r_load_source_map = sl.getReplicationLoadSourceMap
|
r_load_source_map = sl.getReplicationLoadSourceMap
|
||||||
build_source_string(r_load_source_map, r_source_string)
|
build_source_string(r_load_source_map, r_source_string)
|
||||||
puts(format(' %<host>s:', host: server_name.getHostname))
|
puts(format(' %<host>s:', host: server_name.getHostname))
|
||||||
|
|
Loading…
Reference in New Issue