Revert "HBASE-14223 Meta WALs are not cleared if meta region was closed and RS aborts"

Test TestRegionRebalancing is failing fairly reliably since this went in.

This reverts commit c719e8c456.
This commit is contained in:
stack 2015-12-01 16:02:31 -08:00
parent 2d7db89590
commit c4bc1c07bc
14 changed files with 14 additions and 291 deletions

View File

@ -1,36 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.chaos.actions;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
/**
* Action that moves the meta table region(s).
*/
public class MoveMetaAction extends MoveRegionsOfTableAction {
public MoveMetaAction() {
this(-1, MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME);
}
public MoveMetaAction(long sleepTime, long maxSleepTime) {
super(sleepTime, maxSleepTime, TableName.META_TABLE_NAME);
}
}

View File

@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction;
import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction;
import org.apache.hadoop.hbase.chaos.actions.FlushTableAction;
import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction;
import org.apache.hadoop.hbase.chaos.actions.MoveMetaAction;
import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction;
import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
import org.apache.hadoop.hbase.chaos.actions.RemoveColumnAction;
@ -53,8 +52,7 @@ public class NoKillMonkeyFactory extends MonkeyFactory {
MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName),
new MoveMetaAction()
new MoveRandomRegionOfTableAction(tableName)
};
Action[] actions2 = new Action[] {

View File

@ -57,8 +57,7 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName),
new MoveMetaAction()
new MoveRandomRegionOfTableAction(tableName)
};
// Actions such as split/merge/snapshot.
@ -90,7 +89,6 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new MoveMetaAction()
};
// Action to log more info for debugging

View File

@ -34,8 +34,7 @@ public class StressAssignmentManagerMonkeyFactory extends MonkeyFactory {
new CompactTableAction(tableName, 0.5f),
new CompactRandomRegionOfTableAction(tableName, 0.6f),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveMetaAction()
new FlushRandomRegionOfTableAction(tableName)
};
Action[] actions2 = new Action[]{
@ -56,7 +55,6 @@ public class StressAssignmentManagerMonkeyFactory extends MonkeyFactory {
new SplitAllRegionOfTableAction(tableName),
new DecreaseMaxHFileSizeAction(MonkeyConstants.DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME,
tableName),
new MoveMetaAction()
};
// Action to log more info for debugging

View File

@ -1858,26 +1858,6 @@ public class HRegionServer extends HasThread implements
return wal;
}
@Override
public void releaseWAL(HRegionInfo regionInfo, WAL wal) throws IOException {
if (regionInfo != null && regionInfo.isMetaTable() &&
regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
walFactory.closeMetaWAL(regionInfo.getEncodedNameAsBytes());
LogRoller roller;
if (regionInfo != null && regionInfo.isMetaTable() &&
regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
roller = metawalRoller.get();
if (roller != null) {
roller.removeWAL(wal); // only do this for meta WAL
}
// TODO: meta wal roller is left running. Should be fine.
}
}
}
@Override
public ClusterConnection getConnection() {
return this.clusterConnection;

View File

@ -82,10 +82,6 @@ public class LogRoller extends HasThread {
}
}
public void removeWAL(final WAL wal) {
walNeedsRoll.remove(wal);
}
public void requestRollAll() {
for (WAL wal : walNeedsRoll.keySet()) {
walNeedsRoll.put(wal, Boolean.TRUE);

View File

@ -165,6 +165,7 @@ import org.apache.hadoop.hbase.regionserver.Region.Operation;
import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope;
import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler;
import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
@ -174,7 +175,6 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALKey;
import org.apache.hadoop.hbase.wal.WALSplitter;
import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
@ -1609,8 +1609,9 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
return response;
}
HRegion.warmupHRegion(region, htd, null,
HRegion.warmupHRegion(region, htd, regionServer.getWAL(region),
regionServer.getConfiguration(), regionServer, null);
} catch (IOException ie) {
LOG.error("Failed warming up region " + region.getRegionNameAsString(), ie);
throw new ServiceException(ie);

View File

@ -54,12 +54,6 @@ public interface RegionServerServices extends OnlineRegions, FavoredNodesForRegi
* default (common) WAL */
WAL getWAL(HRegionInfo regionInfo) throws IOException;
/**
* Releases the dependency of this region to the WAL previously obtained from
* {@link #getWAL(HRegionInfo)}.
*/
void releaseWAL(HRegionInfo regionInfo, WAL wal) throws IOException;
/**
* @return Implementation of {@link CompactionRequestor} or null.
*/

View File

@ -19,9 +19,6 @@
package org.apache.hadoop.hbase.regionserver.handler;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import java.io.IOException;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.executor.EventType;
@ -43,9 +40,4 @@ public class CloseMetaHandler extends CloseRegionHandler {
super(server, rsServices, regionInfo, abort, closeRegionCoordination,
crd, EventType.M_RS_CLOSE_META);
}
@Override
protected void releaseWALIfNeeded() throws IOException {
rsServices.releaseWAL(regionInfo, region.getWAL());
}
}

View File

@ -46,9 +46,8 @@ public class CloseRegionHandler extends EventHandler {
// have a running queue of user regions to close?
private static final Log LOG = LogFactory.getLog(CloseRegionHandler.class);
protected final RegionServerServices rsServices;
protected final HRegionInfo regionInfo;
protected HRegion region;
private final RegionServerServices rsServices;
private final HRegionInfo regionInfo;
// If true, the hosting server is aborting. Region close process is different
// when we are aborting.
@ -120,7 +119,7 @@ public class CloseRegionHandler extends EventHandler {
LOG.debug("Processing close of " + name);
String encodedRegionName = regionInfo.getEncodedName();
// Check that this region is being served here
region = (HRegion)rsServices.getFromOnlineRegions(encodedRegionName);
HRegion region = (HRegion)rsServices.getFromOnlineRegions(encodedRegionName);
if (region == null) {
LOG.warn("Received CLOSE for region " + name + " but currently not serving - ignoring");
// TODO: do better than a simple warning
@ -144,10 +143,6 @@ public class CloseRegionHandler extends EventHandler {
regionInfo.getRegionNameAsString());
return;
}
if (!abort) {
releaseWALIfNeeded();
}
} catch (IOException ioe) {
// An IOException here indicates that we couldn't successfully flush the
// memstore before closing. So, we need to abort the server and allow
@ -172,8 +167,4 @@ public class CloseRegionHandler extends EventHandler {
remove(this.regionInfo.getEncodedNameAsBytes());
}
}
protected void releaseWALIfNeeded() throws IOException {
// release the WAL if needed. Only meta does this for now.
}
}

View File

@ -251,21 +251,6 @@ public class WALFactory {
return metaProvider.getWAL(identifier, null);
}
/**
* Closes the meta WAL and meta WALProvider
*/
public void closeMetaWAL(final byte[] identifier) throws IOException {
// NOTE: this assumes single META region. The close of WAL does not do ref-counting for the
// number of regions depending on the meta WAL
WALProvider metaProvider = this.metaProvider.get();
if (metaProvider != null) {
if (this.metaProvider.compareAndSet(metaProvider, null)) {
// close the metaProvider
metaProvider.close();
}
}
}
public Reader createReader(final FileSystem fs, final Path path) throws IOException {
return createReader(fs, path, (CancelableProgressable)null);
}

View File

@ -102,7 +102,7 @@ public class MockRegionServerServices implements RegionServerServices {
public List<Region> getOnlineRegions(TableName tableName) throws IOException {
return null;
}
@Override
public Set<TableName> getOnlineTables() {
return null;
@ -181,7 +181,7 @@ public class MockRegionServerServices implements RegionServerServices {
public TableLockManager getTableLockManager() {
return new NullTableLockManager();
}
@Override
public RegionServerQuotaManager getRegionServerQuotaManager() {
return null;
@ -304,8 +304,4 @@ public class MockRegionServerServices implements RegionServerServices {
public double getCompactionPressure() {
return 0;
}
@Override
public void releaseWAL(HRegionInfo regionInfo, WAL wal) throws IOException {
}
}

View File

@ -66,6 +66,8 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.MergeRegionsReques
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.MergeRegionsResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WarmupRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WarmupRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.RollWALWriterRequest;
@ -78,8 +80,6 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateConfiguratio
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateConfigurationResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.UpdateFavoredNodesResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WarmupRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WarmupRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileResponse;
@ -541,7 +541,7 @@ ClientProtos.ClientService.BlockingInterface, RegionServerServices {
// TODO Auto-generated method stub
return null;
}
@Override
public Set<TableName> getOnlineTables() {
return null;
@ -649,8 +649,4 @@ ClientProtos.ClientService.BlockingInterface, RegionServerServices {
public double getCompactionPressure() {
return 0;
}
@Override
public void releaseWAL(HRegionInfo regionInfo, WAL wal) throws IOException {
}
}

View File

@ -1,166 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.wal;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Waiter.Predicate;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Tests the case where a meta region is opened in one regionserver and closed, there should not
* be any WALs left over.
*/
@Category({MediumTests.class})
public class TestMetaWALsAreClosed {
protected static final Log LOG = LogFactory.getLog(TestMetaWALsAreClosed.class);
protected static final int NUM_RS = 2;
protected static final HBaseTestingUtility TEST_UTIL =
new HBaseTestingUtility();
protected final Configuration conf = TEST_UTIL.getConfiguration();
@Before
public void setUp() throws Exception {
TEST_UTIL.startMiniCluster(1, NUM_RS);
}
@After
public void tearDown() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
private boolean isHostingMeta(FileSystem fs, Path wals, ServerName serverName)
throws IOException {
for (FileStatus status : fs.listStatus(wals)) {
LOG.info(status.getPath());
if (DefaultWALProvider.isMetaFile(status.getPath())) {
return true; // only 1 meta region for now
}
}
return false;
}
private void moveMetaRegionAndWait(final ServerName target) throws Exception {
try (final Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
final Admin admin = conn.getAdmin();
final RegionLocator rl = conn.getRegionLocator(TableName.META_TABLE_NAME)) {
LOG.info("Disabling balancer");
admin.setBalancerRunning(false, true);
LOG.info("Moving meta region");
admin.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
Bytes.toBytes(target.toString()));
LOG.info("Waiting for meta region to move");
// wait for the move of meta region
TEST_UTIL.waitFor(30000, new Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return target.equals(
rl.getRegionLocation(HConstants.EMPTY_START_ROW, true).getServerName());
}
});
}
}
@Test (timeout = 60000)
public void testMetaWALsAreClosed() throws Exception {
MiniHBaseCluster cluster = TEST_UTIL.getMiniHBaseCluster();
FileSystem fs = TEST_UTIL.getTestFileSystem();
// find the region server hosting the meta table now.
ServerName metaServerName = null;
ServerName otherServerName = null;
for (RegionServerThread rs : cluster.getRegionServerThreads()) {
ServerName serverName = rs.getRegionServer().getServerName();
Path wals = new Path(FSUtils.getRootDir(TEST_UTIL.getConfiguration()),
DefaultWALProvider.getWALDirectoryName(serverName.toString()));
if (isHostingMeta(fs, wals, serverName)) {
metaServerName = serverName; // only 1 meta region for now
} else {
otherServerName = serverName;
}
}
LOG.info(metaServerName);
LOG.info(otherServerName);
assertNotNull(metaServerName);
assertNotNull(otherServerName);
moveMetaRegionAndWait(otherServerName);
LOG.info("Checking that old meta server does not have WALs for meta");
// the server that used to host meta now should not have any WAL files for the meta region now
Path wals = new Path(FSUtils.getRootDir(TEST_UTIL.getConfiguration()),
DefaultWALProvider.getWALDirectoryName(metaServerName.toString()));
for (FileStatus status : fs.listStatus(wals)) {
LOG.info(status.getPath());
assertFalse(DefaultWALProvider.isMetaFile(status.getPath()));
}
// assign the meta server back
moveMetaRegionAndWait(metaServerName);
// do some basic operations to ensure that nothing is failing
HTableDescriptor htd = TEST_UTIL.createTableDescriptor("foo");
TEST_UTIL.getHBaseAdmin().createTable(htd);
try (Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
Table table = conn.getTable(htd.getTableName())) {
TEST_UTIL.loadNumericRows(table, TEST_UTIL.fam1, 0, 100);
TEST_UTIL.verifyNumericRows(table, TEST_UTIL.fam1, 0, 100, 0);
}
}
}