HBASE-21413 Empty meta log doesn't get split when restart whole cluster

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Allan Yang 2018-12-06 21:13:03 -08:00 committed by stack
parent 45324b6bb2
commit c9e85773d9
3 changed files with 126 additions and 0 deletions

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
@ -408,4 +409,43 @@ public class MasterWalManager {
} }
} }
} }
/**
* For meta region open and closed normally on a server, it may leave some meta
* WAL in the server's wal dir. Since meta region is no long on this server,
* The SCP won't split those meta wals, just leaving them there. So deleting
* the wal dir will fail since the dir is not empty. Actually We can safely achive those
* meta log and Archiving the meta log and delete the dir.
* @param serverName the server to archive meta log
*/
public void archiveMetaLog(final ServerName serverName) {
try {
Path logDir = new Path(this.rootDir,
AbstractFSWALProvider.getWALDirectoryName(serverName.toString()));
Path splitDir = logDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
if (fs.exists(splitDir)) {
FileStatus[] logfiles = FSUtils.listStatus(fs, splitDir, META_FILTER);
if (logfiles != null) {
for (FileStatus status : logfiles) {
if (!status.isDir()) {
Path newPath = AbstractFSWAL.getWALArchivePath(this.oldLogDir,
status.getPath());
if (!FSUtils.renameAndSetModifyTime(fs, status.getPath(), newPath)) {
LOG.warn("Unable to move " + status.getPath() + " to " + newPath);
} else {
LOG.debug("Archived meta log " + status.getPath() + " to " + newPath);
}
}
}
}
if (!fs.delete(splitDir, false)) {
LOG.warn("Unable to delete log dir. Ignoring. " + splitDir);
}
}
} catch (IOException ie) {
LOG.warn("Failed archiving meta log for server " + serverName, ie);
}
}
} }

View File

@ -251,6 +251,9 @@ public class ServerCrashProcedure
// PROBLEM!!! WE BLOCK HERE. // PROBLEM!!! WE BLOCK HERE.
am.getRegionStates().logSplitting(this.serverName); am.getRegionStates().logSplitting(this.serverName);
mwm.splitLog(this.serverName); mwm.splitLog(this.serverName);
if (!carryingMeta) {
mwm.archiveMetaLog(this.serverName);
}
am.getRegionStates().logSplit(this.serverName); am.getRegionStates().logSplit(this.serverName);
LOG.debug("Done splitting WALs {}", this); LOG.debug("Done splitting WALs {}", this);
} }

View File

@ -0,0 +1,83 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.FSUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.hbase.wal.AbstractFSWALProvider.SPLITTING_EXT;
import static org.junit.Assert.fail;
@Category(MediumTests.class)
public class TestCleanupMetaWAL {
private static final Logger LOG = LoggerFactory.getLogger(TestCleanupMetaWAL.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestCleanupMetaWAL.class);
@BeforeClass
public static void before() throws Exception {
TEST_UTIL.startMiniCluster(2);
}
@AfterClass
public static void after() throws Exception {
TEST_UTIL.shutdownMiniZKCluster();
}
@Test
public void testCleanupMetaWAL() throws Exception {
TEST_UTIL.createTable(TableName.valueOf("test"), "cf");
HRegionServer serverWithMeta = TEST_UTIL.getMiniHBaseCluster()
.getRegionServer(TEST_UTIL.getMiniHBaseCluster().getServerWithMeta());
TEST_UTIL.getAdmin()
.move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), null);
TEST_UTIL.getMiniHBaseCluster().killRegionServer(serverWithMeta.getServerName());
TEST_UTIL.waitFor(10000, () ->
TEST_UTIL.getMiniHBaseCluster().getMaster().getProcedures().stream()
.filter(p -> p instanceof ServerCrashProcedure && p.isFinished()).count() > 0);
MasterFileSystem fs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem();
Path walPath = new Path(fs.getWALRootDir(), HConstants.HREGION_LOGDIR_NAME);
for (FileStatus status : FSUtils.listStatus(fs.getFileSystem(), walPath)) {
if (status.getPath().toString().contains(SPLITTING_EXT)) {
fail("Should not have splitting wal dir here:" + status);
}
}
}
}