HBASE-21561 Backport HBASE-21413 (Empty meta log doesn't get split when restart whole cluster) to branch-1

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
xcang 2019-01-21 17:40:59 -08:00 committed by Andrew Purtell
parent aa1b750899
commit acdd23ec4c
No known key found for this signature in database
GPG Key ID: 8597754DD5365CCD
4 changed files with 141 additions and 0 deletions

View File

@ -652,4 +652,43 @@ public class MasterFileSystem {
public void logFileSystemState(Log log) throws IOException {
FSUtils.logFileSystemState(fs, rootdir, log);
}
/**
* For meta region open and closed normally on a server, it may leave some meta
* WAL in the server's wal dir. Since meta region is no long on this server,
* The SCP won't split those meta wals, just leaving them there. So deleting
* the wal dir will fail since the dir is not empty. Actually We can safely achive those
* meta log and Archiving the meta log and delete the dir.
* @param serverName the server to archive meta log
*/
public void archiveMetaLog(final ServerName serverName) {
try {
Path logDir = new Path(this.rootdir,
DefaultWALProvider.getWALDirectoryName(serverName.toString()));
Path splitDir = logDir.suffix(DefaultWALProvider.SPLITTING_EXT);
if (fs.exists(splitDir)) {
FileStatus[] logfiles = FSUtils.listStatus(fs, splitDir, META_FILTER);
if (logfiles != null) {
for (FileStatus status : logfiles) {
if (!status.isDir()) {
Path newPath = DefaultWALProvider.getWALArchivePath(this.oldLogDir,
status.getPath());
if (!FSUtils.renameAndSetModifyTime(fs, status.getPath(), newPath)) {
LOG.warn("Unable to move " + status.getPath() + " to " + newPath);
} else {
LOG.debug("Archived meta log " + status.getPath() + " to " + newPath);
}
}
}
}
if (!fs.delete(splitDir, false)) {
LOG.warn("Unable to delete log dir. Ignoring. " + splitDir);
}
}
} catch (IOException ie) {
LOG.warn("Failed archiving meta log for server " + serverName, ie);
}
}
}

View File

@ -438,6 +438,9 @@ implements ServerProcedureInterface {
AssignmentManager am = env.getMasterServices().getAssignmentManager();
// TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
mfs.splitLog(this.serverName);
if (!carryingMeta) {
mfs.archiveMetaLog(this.serverName);
}
am.getRegionStates().logSplit(this.serverName);
}

View File

@ -407,4 +407,13 @@ public class DefaultWALProvider implements WALProvider {
return name.substring(0, endIndex);
}
/*
* only public so WALSplitter can use.
* @return archived location of a WAL file with the given path p
*/
public static Path getWALArchivePath(Path archiveDir, Path p) {
return new Path(archiveDir, p.getName());
}
}

View File

@ -0,0 +1,90 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ProcedureInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.DefaultWALProvider;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.fail;
@Category(MediumTests.class)
public class TestCleanupMetaWAL {
private static final Logger LOG = LoggerFactory.getLogger(TestCleanupMetaWAL.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@BeforeClass
public static void before() throws Exception {
TEST_UTIL.startMiniCluster(2);
}
@AfterClass
public static void after() throws Exception {
TEST_UTIL.shutdownMiniZKCluster();
}
@Test
public void testCleanupMetaWAL() throws Exception {
TEST_UTIL.createTable(TableName.valueOf("test"), "cf");
HRegionServer serverWithMeta = TEST_UTIL.getMiniHBaseCluster()
.getRegionServer(TEST_UTIL.getMiniHBaseCluster().getServerWithMeta());
TEST_UTIL.getHBaseAdmin()
.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), null);
TEST_UTIL.getMiniHBaseCluster().killRegionServer(serverWithMeta.getServerName());
int count = 0;
boolean scpFinished = false;
while(count < 25 && !scpFinished) {
List<ProcedureInfo> procs = TEST_UTIL.getMiniHBaseCluster().getMaster().listProcedures();
for(ProcedureInfo pi : procs) {
if(pi.getProcName().startsWith("ServerCrashProcedure") && pi.getProcState() ==
ProcedureProtos.ProcedureState.FINISHED){
LOG.info("SCP is finished: " + pi.getProcName());
scpFinished = true;
break;
}
}
Thread.sleep(1000);
count++;
}
MasterFileSystem fs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem();
Path walPath = new Path(fs.getWALRootDir(), HConstants.HREGION_LOGDIR_NAME);
for (FileStatus status : FSUtils.listStatus(fs.getFileSystem(), walPath)) {
if (status.getPath().toString().contains(DefaultWALProvider.SPLITTING_EXT)) {
fail("Should not have splitting wal dir here:" + status);
}
}
}
}