From fabb1d97cc66662dc0d0657344bc9a014cf88088 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 16 Mar 2018 20:36:57 -0700 Subject: [PATCH] HBASE-20169 NPE when calling HBTU.shutdownMiniCluster Adds a prepare step to RecoverMetaProcedure in which we test for cluster up and master being up. If not up, we fail the run. Modified hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java Modified hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java Minor log cleanup. Modified hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java Add pepare step. Modified hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java Debug for the failing test.... Added hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverMetaProcedure.java Test the prepare step goes down if master or cluster are down. --- .../procedure2/StateMachineProcedure.java | 3 +- .../src/main/protobuf/MasterProcedure.proto | 1 + .../hbase/master/cleaner/HFileCleaner.java | 6 +- .../procedure/RecoverMetaProcedure.java | 17 ++- .../hbase/regionserver/ChunkCreator.java | 2 +- .../master/TestAssignmentManagerMetrics.java | 1 + .../procedure/TestRecoverMetaProcedure.java | 109 ++++++++++++++++++ 7 files changed, 133 insertions(+), 6 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverMetaProcedure.java diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java index 222ae935aba..0e3a4cd6547 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java @@ -74,7 +74,8 @@ public abstract class StateMachineProcedure */ private int previousState; - protected enum Flow { + @VisibleForTesting + public enum Flow { HAS_MORE_STATE, NO_MORE_STATE, } diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto index 9666c258466..fa6fa757a80 100644 --- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto @@ -307,6 +307,7 @@ enum ServerCrashState { } enum RecoverMetaState { + RECOVER_META_PREPARE = 0; RECOVER_META_SPLIT_LOGS = 1; RECOVER_META_ASSIGN_REGIONS = 2; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java index 1fbda430343..b85b56963b6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java @@ -235,12 +235,12 @@ public class HFileCleaner extends CleanerChore { break; } if (task != null) { - LOG.debug("Removing: {} from archive", task.filePath); + LOG.debug("Removing {}", task.filePath); boolean succeed; try { succeed = this.fs.delete(task.filePath, false); } catch (IOException e) { - LOG.warn("Failed to delete file {}", task.filePath, e); + LOG.warn("Failed to delete {}", task.filePath, e); succeed = false; } task.setResult(succeed); @@ -250,7 +250,7 @@ public class HFileCleaner extends CleanerChore { } } } finally { - LOG.debug("Exit thread: {}", Thread.currentThread()); + LOG.debug("Exit {}", Thread.currentThread()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java index 301cd181f49..2234a1bb898 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java @@ -104,6 +104,21 @@ public class RecoverMetaProcedure try { switch (state) { + case RECOVER_META_PREPARE: + // If Master is going down or cluster is up, skip this assign by returning NO_MORE_STATE + if (!master.isClusterUp()) { + String msg = "Cluster not up! Skipping hbase:meta assign."; + LOG.warn(msg); + return Flow.NO_MORE_STATE; + } + if (master.isStopping() || master.isStopped()) { + String msg = "Master stopping=" + master.isStopping() + ", stopped=" + + master.isStopped() + "; skipping hbase:meta assign."; + LOG.warn(msg); + return Flow.NO_MORE_STATE; + } + setNextState(RecoverMetaState.RECOVER_META_SPLIT_LOGS); + break; case RECOVER_META_SPLIT_LOGS: LOG.info("Start " + this); if (shouldSplitWal) { @@ -202,7 +217,7 @@ public class RecoverMetaProcedure @Override protected MasterProcedureProtos.RecoverMetaState getInitialState() { - return RecoverMetaState.RECOVER_META_SPLIT_LOGS; + return RecoverMetaState.RECOVER_META_PREPARE; } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java index 9aeb06f840f..5577be47ad2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java @@ -424,7 +424,7 @@ public class ChunkCreator { long created = chunkCount.get(); long reused = reusedChunkCount.sum(); long total = created + reused; - LOG.debug("{} Stats (chunk size={}): current pool size={}, created chunk count={}, " + + LOG.debug("{} stats (chunk size={}): current pool size={}, created chunk count={}, " + "reused chunk count={}, reuseRatio={}", label, chunkSize, reclaimedChunks.size(), created, reused, (total == 0? "0": StringUtils.formatPercent((float)reused/(float)total,2))); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java index 4aa97d33569..87f6fa4e25f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java @@ -98,6 +98,7 @@ public class TestAssignmentManagerMetrics { @AfterClass public static void after() throws Exception { + LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL); TEST_UTIL.shutdownMiniCluster(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverMetaProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverMetaProcedure.java new file mode 100644 index 00000000000..dc939f50f00 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverMetaProcedure.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.assignment.MockMasterServices; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +@Category({MasterTests.class, SmallTests.class}) +public class TestRecoverMetaProcedure { + private static final Logger LOG = LoggerFactory.getLogger(TestRecoverMetaProcedure.class); + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRecoverMetaProcedure.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + /** + * Test the new prepare step. + * Here we test that our Mock is faking out the precedure well-enough for it to progress past the + * first prepare stage. + */ + @Test + public void testPrepare() throws ProcedureSuspendedException, ProcedureYieldException, + InterruptedException, IOException { + RecoverMetaProcedure rmp = new RecoverMetaProcedure(); + MasterProcedureEnv env = Mockito.mock(MasterProcedureEnv.class); + MasterServices masterServices = + new MockMasterServices(UTIL.getConfiguration(), null); + Mockito.when(env.getMasterServices()).thenReturn(masterServices); + assertEquals(StateMachineProcedure.Flow.HAS_MORE_STATE, + rmp.executeFromState(env, rmp.getInitialState())); + int stateId = rmp.getCurrentStateId(); + assertEquals(MasterProcedureProtos.RecoverMetaState.RECOVER_META_SPLIT_LOGS_VALUE, + rmp.getCurrentStateId()); + } + + /** + * Test the new prepare step. + * If Master is stopping, procedure should skip the assign by returning NO_MORE_STATE + */ + @Test + public void testPrepareWithMasterStopping() throws ProcedureSuspendedException, + ProcedureYieldException, InterruptedException, IOException { + RecoverMetaProcedure rmp = new RecoverMetaProcedure(); + MasterProcedureEnv env = Mockito.mock(MasterProcedureEnv.class); + MasterServices masterServices = new MockMasterServices(UTIL.getConfiguration(), null) { + @Override + public boolean isStopping() { + return true; + } + }; + Mockito.when(env.getMasterServices()).thenReturn(masterServices); + assertEquals(StateMachineProcedure.Flow.NO_MORE_STATE, + rmp.executeFromState(env, rmp.getInitialState())); + } + + /** + * Test the new prepare step. + * If cluster is down, procedure should skip the assign by returning NO_MORE_STATE + */ + @Test + public void testPrepareWithNoCluster() throws ProcedureSuspendedException, + ProcedureYieldException, InterruptedException, IOException { + RecoverMetaProcedure rmp = new RecoverMetaProcedure(); + MasterProcedureEnv env = Mockito.mock(MasterProcedureEnv.class); + MasterServices masterServices = new MockMasterServices(UTIL.getConfiguration(), null) { + @Override + public boolean isClusterUp() { + return false; + } + }; + Mockito.when(env.getMasterServices()).thenReturn(masterServices); + assertEquals(StateMachineProcedure.Flow.NO_MORE_STATE, + rmp.executeFromState(env, rmp.getInitialState())); + } +}