From 8a1ffa6c4388c8b4f32b3e9de8a49eac490bcb94 Mon Sep 17 00:00:00 2001 From: stack Date: Mon, 20 Apr 2020 15:35:40 -0700 Subject: [PATCH] HBASE-24220 Allow that zk NOTEMPTY multi exception is retryable by running in-series hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java Cleanup checkstyle warnings. Don't depend on hbase-client ScannerCallable. hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java Cut down on cluster resource usage. hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java Debug hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController3.java Debug hbase-thrift/src/test/java/org/apache/hadoop/hbase/thrift/TestThriftHttpServer.java Debug hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MiniZooKeeperCluster.java Debug hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java Allow that NONEMPTY is retryable by running in series. --- .../hbase/mapred/TableRecordReaderImpl.java | 25 ++++-------- .../mapreduce/TableRecordReaderImpl.java | 38 +++++++++---------- .../hbase/snapshot/TestExportSnapshot.java | 2 +- .../hadoop/hbase/master/AbstractTestDLS.java | 13 +++---- .../access/TestAccessController3.java | 12 +++--- .../hbase/thrift/TestThriftHttpServer.java | 3 +- .../hbase/zookeeper/MiniZooKeeperCluster.java | 9 ++--- .../apache/hadoop/hbase/zookeeper/ZKUtil.java | 12 ++++-- 8 files changed, 54 insertions(+), 60 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java index a49d0ec5c3e..b68ea504dfc 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableRecordReaderImpl.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -18,24 +18,22 @@ */ package org.apache.hadoop.hbase.mapred; +import static org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.LOG_PER_ROW_COUNT; import java.io.IOException; -import org.apache.yetus.audience.InterfaceAudience; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.client.ScannerCallable; import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.util.StringUtils; - -import static org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.LOG_PER_ROW_COUNT; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Iterate over an HBase table data, return (Text, RowResult) pairs @@ -58,9 +56,6 @@ public class TableRecordReaderImpl { /** * Restart from survivable exceptions by creating a new scanner. - * - * @param firstRow - * @throws IOException */ public void restart(byte[] firstRow) throws IOException { Scan currentScan; @@ -100,8 +95,6 @@ public class TableRecordReaderImpl { /** * Build the scanner. Not done in constructor to allow for extension. - * - * @throws IOException */ public void init() throws IOException { restart(startRow); @@ -116,7 +109,7 @@ public class TableRecordReaderImpl { public void setHTable(Table htable) { Configuration conf = htable.getConfiguration(); logScannerActivity = conf.getBoolean( - ScannerCallable.LOG_SCANNER_ACTIVITY, false); + "hbase.client.log.scanner.activity" /*ScannerCallable.LOG_SCANNER_ACTIVITY*/, false); logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100); this.htable = htable; } @@ -194,10 +187,8 @@ public class TableRecordReaderImpl { * @param key HStoreKey as input key. * @param value MapWritable as input value * @return true if there was more data - * @throws IOException */ - public boolean next(ImmutableBytesWritable key, Result value) - throws IOException { + public boolean next(ImmutableBytesWritable key, Result value) throws IOException { Result result; try { try { diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java index 1fa943b27c9..439a2a650b9 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableRecordReaderImpl.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,24 +20,22 @@ package org.apache.hadoop.hbase.mapreduce; import java.io.IOException; import java.lang.reflect.Method; import java.util.Map; -import org.apache.yetus.audience.InterfaceAudience; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.client.ScannerCallable; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; -import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.util.StringUtils; - +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; /** @@ -102,10 +100,9 @@ public class TableRecordReaderImpl { * In new mapreduce APIs, TaskAttemptContext has two getCounter methods * Check if getCounter(String, String) method is available. * @return The getCounter method or null if not available. - * @throws IOException */ protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context) - throws IOException { + throws IOException { Method m = null; try { m = context.getClass().getMethod("getCounter", @@ -126,7 +123,7 @@ public class TableRecordReaderImpl { public void setHTable(Table htable) { Configuration conf = htable.getConfiguration(); logScannerActivity = conf.getBoolean( - ScannerCallable.LOG_SCANNER_ACTIVITY, false); + "hbase.client.log.scanner.activity" /*ScannerCallable.LOG_SCANNER_ACTIVITY*/, false); logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100); this.htable = htable; } @@ -142,9 +139,6 @@ public class TableRecordReaderImpl { /** * Build the scanner. Not done in constructor to allow for extension. - * - * @throws IOException - * @throws InterruptedException */ public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, @@ -176,7 +170,6 @@ public class TableRecordReaderImpl { * Returns the current key. * * @return The current key. - * @throws IOException * @throws InterruptedException When the job is aborted. */ public ImmutableBytesWritable getCurrentKey() throws IOException, @@ -204,12 +197,18 @@ public class TableRecordReaderImpl { * @throws InterruptedException When the job was aborted. */ public boolean nextKeyValue() throws IOException, InterruptedException { - if (key == null) key = new ImmutableBytesWritable(); - if (value == null) value = new Result(); + if (key == null) { + key = new ImmutableBytesWritable(); + } + if (value == null) { + value = new Result(); + } try { try { value = this.scanner.next(); - if (value != null && value.isStale()) numStale++; + if (value != null && value.isStale()) { + numStale++; + } if (logScannerActivity) { rowcount ++; if (rowcount >= logPerRowCount) { @@ -242,7 +241,9 @@ public class TableRecordReaderImpl { scanner.next(); // skip presumed already mapped row } value = scanner.next(); - if (value != null && value.isStale()) numStale++; + if (value != null && value.isStale()) { + numStale++; + } numRestarts++; } @@ -281,7 +282,6 @@ public class TableRecordReaderImpl { * counters thus can update counters based on scanMetrics. * If hbase runs on old version of mapreduce, it won't be able to get * access to counters and TableRecorderReader can't update counter values. - * @throws IOException */ private void updateCounters() throws IOException { ScanMetrics scanMetrics = scanner.getScanMetrics(); diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java index f419b108c7b..8d65ac17cb1 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java @@ -92,7 +92,7 @@ public class TestExportSnapshot { @BeforeClass public static void setUpBeforeClass() throws Exception { setUpBaseConf(TEST_UTIL.getConfiguration()); - TEST_UTIL.startMiniCluster(3); + TEST_UTIL.startMiniCluster(1); TEST_UTIL.startMiniMapReduceCluster(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java index 18ffd477d0c..485b84b5488 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -30,7 +30,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -43,6 +42,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.LongAdder; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -92,7 +92,6 @@ import org.junit.Test; import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; /** @@ -225,7 +224,7 @@ public abstract class AbstractTestDLS { Path editsdir = WALSplitUtil .getRegionDirRecoveredEditsDir(FSUtils.getWALRegionDir(conf, tableName, hri.getEncodedName())); - LOG.debug("checking edits dir " + editsdir); + LOG.debug("Checking edits dir " + editsdir); FileStatus[] files = fs.listStatus(editsdir, new PathFilter() { @Override public boolean accept(Path p) { @@ -235,9 +234,9 @@ public abstract class AbstractTestDLS { return true; } }); - assertTrue( - "edits dir should have more than a single file in it. instead has " + files.length, - files.length > 1); + LOG.info("Files {}", Arrays.stream(files).map(f -> f.getPath().toString()). + collect(Collectors.joining(","))); + assertTrue("Edits dir should have more than a one file", files.length > 1); for (int i = 0; i < files.length; i++) { int c = countWAL(files[i].getPath(), fs, conf); count += c; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController3.java index d4ae32f06e2..e4a7e84f325 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController3.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController3.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.security.access; import static org.apache.hadoop.hbase.AuthUtil.toGroupEntry; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Coprocessor; import org.apache.hadoop.hbase.CoprocessorEnvironment; @@ -195,14 +194,13 @@ public class TestAccessController3 extends SecureTestUtil { @AfterClass public static void tearDownAfterClass() throws Exception { - HRegionServer rs = null; - for (JVMClusterUtil.RegionServerThread thread: - TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads()) { - rs = thread.getRegionServer(); - } + assertEquals(1, TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads().size()); + HRegionServer rs = TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads().get(0). + getRegionServer(); + // Strange place for an assert. + assertFalse("RegionServer should have ABORTED (FaultyAccessController)", rs.isAborted()); cleanUp(); TEST_UTIL.shutdownMiniCluster(); - assertFalse("region server should have aborted due to FaultyAccessController", rs.isAborted()); } private static void setUpTableAndUserPermissions() throws Exception { diff --git a/hbase-thrift/src/test/java/org/apache/hadoop/hbase/thrift/TestThriftHttpServer.java b/hbase-thrift/src/test/java/org/apache/hadoop/hbase/thrift/TestThriftHttpServer.java index 86bb53f13a3..ffb9a5f8f5d 100644 --- a/hbase-thrift/src/test/java/org/apache/hadoop/hbase/thrift/TestThriftHttpServer.java +++ b/hbase-thrift/src/test/java/org/apache/hadoop/hbase/thrift/TestThriftHttpServer.java @@ -207,7 +207,8 @@ public class TestThriftHttpServer { HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection(); conn.setRequestMethod("TRACE"); conn.connect(); - Assert.assertEquals(HttpURLConnection.HTTP_FORBIDDEN, conn.getResponseCode()); + Assert.assertEquals(conn.getResponseMessage(), + HttpURLConnection.HTTP_FORBIDDEN, conn.getResponseCode()); } protected static volatile boolean tableCreated = false; diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MiniZooKeeperCluster.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MiniZooKeeperCluster.java index bdf7bb5579d..5779394588e 100644 --- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MiniZooKeeperCluster.java +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MiniZooKeeperCluster.java @@ -321,19 +321,18 @@ public class MiniZooKeeperCluster { public void shutdown() throws IOException { // shut down all the zk servers for (int i = 0; i < standaloneServerFactoryList.size(); i++) { - NIOServerCnxnFactory standaloneServerFactory = - standaloneServerFactoryList.get(i); + NIOServerCnxnFactory standaloneServerFactory = standaloneServerFactoryList.get(i); int clientPort = clientPortList.get(i); - standaloneServerFactory.shutdown(); if (!waitForServerDown(clientPort, connectionTimeout)) { - throw new IOException("Waiting for shutdown of standalone server"); + throw new IOException("Waiting for shutdown of standalone server at port=" + clientPort + + ", timeout=" + this.connectionTimeout); } } standaloneServerFactoryList.clear(); for (ZooKeeperServer zkServer: zooKeeperServers) { - //explicitly close ZKDatabase since ZookeeperServer does not close them + // Explicitly close ZKDatabase since ZookeeperServer does not close them zkServer.getZKDatabase().close(); } zooKeeperServers.clear(); diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index 5c9fcaba29d..3f0f93f6321 100644 --- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -36,6 +36,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import javax.security.auth.login.AppConfigurationEntry; import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; @@ -1535,6 +1536,10 @@ public final class ZKUtil { public abstract static class ZKUtilOp { private String path; + @Override public String toString() { + return this.getClass().getSimpleName() + ", path=" + this.path; + } + private ZKUtilOp(String path) { this.path = path; } @@ -1750,12 +1755,13 @@ public final class ZKUtil { case NONODE: case BADVERSION: case NOAUTH: + case NOTEMPTY: // if we get an exception that could be solved by running sequentially // (and the client asked us to), then break out and run sequentially if (runSequentialOnMultiFailure) { - LOG.info("On call to ZK.multi, received exception: " + ke.toString() + "." - + " Attempting to run operations sequentially because" - + " runSequentialOnMultiFailure is: " + runSequentialOnMultiFailure + "."); + LOG.info("multi exception: {}; running operations sequentially " + + "(runSequentialOnMultiFailure=true); {}", ke.toString(), + ops.stream().map(o -> o.toString()).collect(Collectors.joining(","))); processSequentially(zkw, ops); break; }