From 9e469e4a2a232ebd4ef778aff19d8567016aede1 Mon Sep 17 00:00:00 2001 From: Duo Zhang Date: Fri, 17 Dec 2021 21:24:26 +0800 Subject: [PATCH] HBASE-26540 Implement a tool to verify the lag of region replication (#3950) Signed-off-by: GeorryHuang --- .../hbase/RegionReplicationLagEvaluation.java | 167 ++++++++++++++++++ .../TestRegionReplicationLagEvaluation.java | 61 +++++++ 2 files changed, 228 insertions(+) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/RegionReplicationLagEvaluation.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionReplicationLagEvaluation.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/RegionReplicationLagEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/RegionReplicationLagEvaluation.java new file mode 100644 index 00000000000..6a54adcaced --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/RegionReplicationLagEvaluation.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import com.google.errorprone.annotations.RestrictedApi; +import java.io.IOException; +import java.util.Arrays; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.ConnectionUtils; +import org.apache.hadoop.hbase.client.Consistency; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser; +import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; + +/** + * A tool to evaluating the lag between primary replica and secondary replica. + *

+ * It simply adds a row to the primary replica, and then check how long before we can read it from + * the secondary replica. + */ +@InterfaceAudience.Private +public class RegionReplicationLagEvaluation extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(RegionReplicationLagEvaluation.class); + + public static final String TABLE_NAME = "TestLagTable"; + + public static final String FAMILY_NAME = "info"; + + public static final String QUALIFIER_NAME = "qual"; + + public static final int VALUE_LENGTH = 256; + + public static final int ROW_LENGTH = 16; + + private static final Options OPTIONS = new Options().addOption("t", "table", true, "Table name") + .addOption("rlen", "rlength", true, "The length of row key") + .addOption("vlen", "vlength", true, "The length of value") + .addRequiredOption("r", "rows", true, "Number of rows to test"); + + private FastLongHistogram histogram = new FastLongHistogram(); + + @RestrictedApi(explanation = "Should only be called in tests", link = "", + allowedOnPath = ".*/src/test/.*") + FastLongHistogram getHistogram() { + return histogram; + } + + @Override + public int run(String[] args) throws Exception { + TableName tableName; + int rlen; + int vlen; + int rows; + try { + CommandLine cli = new DefaultParser().parse(OPTIONS, args); + tableName = TableName.valueOf(cli.getOptionValue("t", TABLE_NAME)); + rlen = Integer.parseInt(cli.getOptionValue("rlen", String.valueOf(ROW_LENGTH))); + vlen = Integer.parseInt(cli.getOptionValue("vlen", String.valueOf(VALUE_LENGTH))); + rows = Integer.parseInt(cli.getOptionValue("r")); + } catch (Exception e) { + LOG.warn("Error parsing command line options", e); + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp(getClass().getName(), OPTIONS); + return -1; + } + exec(tableName, rlen, vlen, rows); + return 0; + } + + private void createTable(Admin admin, TableName tableName) throws IOException { + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY_NAME)).setRegionReplication(2) + .build(); + admin.createTable(td); + } + + private void checkLag(Table table, int rlen, int vlen, int rows) throws IOException { + ThreadLocalRandom rand = ThreadLocalRandom.current(); + byte[] family = Bytes.toBytes(FAMILY_NAME); + byte[] qualifier = Bytes.toBytes(QUALIFIER_NAME); + byte[] row = new byte[rlen]; + byte[] value = new byte[vlen]; + LOG.info("Test replication lag on table {} with {} rows", table.getName(), rows); + for (int i = 0; i < rows; i++) { + rand.nextBytes(row); + rand.nextBytes(value); + table.put(new Put(row).addColumn(family, qualifier, value)); + // get from secondary replica + Get get = new Get(row).setConsistency(Consistency.TIMELINE).setReplicaId(1); + long startNs = System.nanoTime(); + for (int retry = 0;; retry++) { + Result result = table.get(get); + byte[] gotValue = result.getValue(family, qualifier); + if (Arrays.equals(value, gotValue)) { + break; + } + long pauseTimeMs = Math.min(ConnectionUtils.getPauseTime(1, retry), 1000); + Threads.sleepWithoutInterrupt(pauseTimeMs); + } + long lagMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNs); + histogram.add(lagMs, 1); + } + LOG.info("Test finished, min lag {} ms, max lag {} ms, mean lag {} ms", histogram.getMin(), + histogram.getMax(), histogram.getMean()); + long[] q = histogram.getQuantiles(FastLongHistogram.DEFAULT_QUANTILES); + for (int i = 0; i < q.length; i++) { + LOG.info("{}% lag: {} ms", FastLongHistogram.DEFAULT_QUANTILES[i] * 100, q[i]); + } + } + + private void exec(TableName tableName, int rlen, int vlen, int rows) throws IOException { + try (Connection conn = ConnectionFactory.createConnection(getConf())) { + try (Admin admin = conn.getAdmin()) { + if (!admin.tableExists(tableName)) { + createTable(admin, tableName); + } + } + try (Table table = conn.getTable(tableName)) { + checkLag(table, rlen, vlen, rows); + } + } + } + + public static void main(String[] args) throws Exception { + int res = + ToolRunner.run(HBaseConfiguration.create(), new RegionReplicationLagEvaluation(), args); + System.exit(res); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionReplicationLagEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionReplicationLagEvaluation.java new file mode 100644 index 00000000000..f4b7970b8b4 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionReplicationLagEvaluation.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; +import org.apache.hadoop.util.ToolRunner; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MiscTests.class, MediumTests.class }) +public class TestRegionReplicationLagEvaluation { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionReplicationLagEvaluation.class); + + private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); + + @BeforeClass + public static void setUp() throws Exception { + UTIL.getConfiguration().setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY, + true); + UTIL.startMiniCluster(2); + } + + @AfterClass + public static void tearDown() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test + public void test() throws Exception { + RegionReplicationLagEvaluation tool = new RegionReplicationLagEvaluation(); + assertEquals(0, ToolRunner.run(UTIL.getConfiguration(), tool, new String[] { "-r", "1000" })); + FastLongHistogram histogram = tool.getHistogram(); + assertEquals(1000, histogram.getCount()); + } +}