HBASE-14678 Experiment: Temporarily disable balancer and a few others to see if root of crashed/timedout JVMs; ADD TestDistributedLogSplitting to the mix; ADDENDUM added TestSnapshotCloneIndependence to removed set of dodgy tests
This commit is contained in:
parent
1050908ff0
commit
91bca7323a
|
@ -1,481 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.client;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
|
||||
import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
|
||||
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
/**
|
||||
* Test to verify that the cloned table is independent of the table from which it was cloned
|
||||
*/
|
||||
@Category(LargeTests.class)
|
||||
public class TestSnapshotCloneIndependence {
|
||||
private static final Log LOG = LogFactory.getLog(TestSnapshotCloneIndependence.class);
|
||||
|
||||
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||
|
||||
private static final int NUM_RS = 2;
|
||||
private static final String STRING_TABLE_NAME = "test";
|
||||
private static final String TEST_FAM_STR = "fam";
|
||||
protected static final byte[] TEST_FAM = Bytes.toBytes(TEST_FAM_STR);
|
||||
protected static final TableName TABLE_NAME = TableName.valueOf(STRING_TABLE_NAME);
|
||||
private static final int CLEANER_INTERVAL = 100;
|
||||
|
||||
/**
|
||||
* Setup the config for the cluster and start it
|
||||
* @throws Exception on fOailure
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
setupConf(UTIL.getConfiguration());
|
||||
UTIL.startMiniCluster(NUM_RS);
|
||||
}
|
||||
|
||||
private static void setupConf(Configuration conf) {
|
||||
// Up the handlers; this test needs more than usual.
|
||||
conf.setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 15);
|
||||
// enable snapshot support
|
||||
conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
|
||||
// disable the ui
|
||||
conf.setInt("hbase.regionsever.info.port", -1);
|
||||
conf.setInt("hbase.master.info.port", -1);
|
||||
// change the flush size to a small amount, regulating number of store files
|
||||
conf.setInt("hbase.hregion.memstore.flush.size", 25000);
|
||||
// so make sure we get a compaction when doing a load, but keep around
|
||||
// some files in the store
|
||||
conf.setInt("hbase.hstore.compaction.min", 10);
|
||||
conf.setInt("hbase.hstore.compactionThreshold", 10);
|
||||
// block writes if we get to 12 store files
|
||||
conf.setInt("hbase.hstore.blockingStoreFiles", 12);
|
||||
conf.setInt("hbase.regionserver.msginterval", 100);
|
||||
conf.setBoolean("hbase.master.enabletable.roundrobin", true);
|
||||
// Avoid potentially aggressive splitting which would cause snapshot to fail
|
||||
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
|
||||
ConstantSizeRegionSplitPolicy.class.getName());
|
||||
// Execute cleaner frequently to induce failures
|
||||
conf.setInt("hbase.master.cleaner.interval", CLEANER_INTERVAL);
|
||||
conf.setInt("hbase.master.hfilecleaner.plugins.snapshot.period", CLEANER_INTERVAL);
|
||||
// Effectively disable TimeToLiveHFileCleaner. Don't want to fully disable it because that
|
||||
// will even trigger races between creating the directory containing back references and
|
||||
// the back reference itself.
|
||||
conf.setInt("hbase.master.hfilecleaner.ttl", CLEANER_INTERVAL);
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setup() throws Exception {
|
||||
UTIL.createTable(TABLE_NAME, TEST_FAM);
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
UTIL.deleteTable(TABLE_NAME);
|
||||
SnapshotTestingUtils.deleteAllSnapshots(UTIL.getHBaseAdmin());
|
||||
SnapshotTestingUtils.deleteArchiveDirectory(UTIL);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void cleanupTest() throws Exception {
|
||||
try {
|
||||
UTIL.shutdownMiniCluster();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("failure shutting down cluster", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that adding data to the cloned table will not affect the original, and vice-versa when
|
||||
* it is taken as an online snapshot.
|
||||
*/
|
||||
@Ignore ("Flakey. Fix") @Test (timeout=300000)
|
||||
public void testOnlineSnapshotAppendIndependent() throws Exception {
|
||||
runTestSnapshotAppendIndependent(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that adding data to the cloned table will not affect the original, and vice-versa when
|
||||
* it is taken as an offline snapshot.
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
@Ignore
|
||||
public void testOfflineSnapshotAppendIndependent() throws Exception {
|
||||
runTestSnapshotAppendIndependent(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that adding metadata to the cloned table will not affect the original, and vice-versa
|
||||
* when it is taken as an online snapshot.
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
public void testOnlineSnapshotMetadataChangesIndependent() throws Exception {
|
||||
runTestSnapshotMetadataChangesIndependent(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that adding netadata to the cloned table will not affect the original, and vice-versa
|
||||
* when is taken as an online snapshot.
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
@Ignore
|
||||
public void testOfflineSnapshotMetadataChangesIndependent() throws Exception {
|
||||
runTestSnapshotMetadataChangesIndependent(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that region operations, in this case splitting a region, are independent between the
|
||||
* cloned table and the original.
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
@Ignore
|
||||
public void testOfflineSnapshotRegionOperationsIndependent() throws Exception {
|
||||
runTestRegionOperationsIndependent(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that region operations, in this case splitting a region, are independent between the
|
||||
* cloned table and the original.
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
public void testOnlineSnapshotRegionOperationsIndependent() throws Exception {
|
||||
runTestRegionOperationsIndependent(true);
|
||||
}
|
||||
|
||||
@Test (timeout=300000)
|
||||
@Ignore
|
||||
public void testOfflineSnapshotDeleteIndependent() throws Exception {
|
||||
runTestSnapshotDeleteIndependent(false);
|
||||
}
|
||||
|
||||
@Ignore ("Flakey test") @Test (timeout=300000)
|
||||
public void testOnlineSnapshotDeleteIndependent() throws Exception {
|
||||
runTestSnapshotDeleteIndependent(true);
|
||||
}
|
||||
|
||||
private static void waitOnSplit(final HTable t, int originalCount) throws Exception {
|
||||
for (int i = 0; i < 200; i++) {
|
||||
try {
|
||||
Thread.sleep(500);
|
||||
} catch (InterruptedException e) {
|
||||
// Restore the interrupted status
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
if (t.getAllRegionLocations().size() > originalCount) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw new Exception("Split did not increase the number of regions");
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a snapshot of a table, add data, and verify that this only
|
||||
* affects one table
|
||||
* @param online - Whether the table is online or not during the snapshot
|
||||
*/
|
||||
private void runTestSnapshotAppendIndependent(boolean online) throws Exception {
|
||||
FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
|
||||
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
|
||||
|
||||
Admin admin = UTIL.getHBaseAdmin();
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final TableName localTableName =
|
||||
TableName.valueOf(STRING_TABLE_NAME + startTime);
|
||||
|
||||
try (Table original = UTIL.createTable(localTableName, TEST_FAM)) {
|
||||
UTIL.loadTable(original, TEST_FAM);
|
||||
final int origTableRowCount = UTIL.countRows(original);
|
||||
|
||||
// Take a snapshot
|
||||
final String snapshotNameAsString = "snapshot_" + localTableName;
|
||||
byte[] snapshotName = Bytes.toBytes(snapshotNameAsString);
|
||||
|
||||
SnapshotTestingUtils.createSnapshotAndValidate(admin, localTableName, TEST_FAM_STR,
|
||||
snapshotNameAsString, rootDir, fs, online);
|
||||
|
||||
if (!online) {
|
||||
tryDisable(admin, localTableName);
|
||||
}
|
||||
TableName cloneTableName = TableName.valueOf("test-clone-" + localTableName);
|
||||
admin.cloneSnapshot(snapshotName, cloneTableName);
|
||||
|
||||
|
||||
try (Table clonedTable = UTIL.getConnection().getTable(cloneTableName)) {
|
||||
|
||||
// Make sure that all the regions are available before starting
|
||||
UTIL.waitUntilAllRegionsAssigned(cloneTableName);
|
||||
|
||||
final int clonedTableRowCount = countRows(clonedTable);
|
||||
|
||||
|
||||
Assert.assertEquals(
|
||||
"The line counts of original and cloned tables do not match after clone. ",
|
||||
origTableRowCount, clonedTableRowCount);
|
||||
|
||||
// Attempt to add data to the test
|
||||
final String rowKey = "new-row-" + System.currentTimeMillis();
|
||||
|
||||
Put p = new Put(Bytes.toBytes(rowKey));
|
||||
p.add(TEST_FAM, Bytes.toBytes("someQualifier"), Bytes.toBytes("someString"));
|
||||
original.put(p);
|
||||
|
||||
// Verify that it is not present in the original table
|
||||
Assert.assertEquals("The row count of the original table was not modified by the put",
|
||||
origTableRowCount + 1, UTIL.countRows(original));
|
||||
Assert.assertEquals(
|
||||
"The row count of the cloned table changed as a result of addition to the original",
|
||||
clonedTableRowCount, UTIL.countRows(clonedTable));
|
||||
|
||||
p = new Put(Bytes.toBytes(rowKey));
|
||||
p.add(TEST_FAM, Bytes.toBytes("someQualifier"), Bytes.toBytes("someString"));
|
||||
clonedTable.put(p);
|
||||
|
||||
// Verify that the new family is not in the restored table's description
|
||||
Assert.assertEquals(
|
||||
"The row count of the original table was modified by the put to the clone",
|
||||
origTableRowCount + 1, UTIL.countRows(original));
|
||||
Assert.assertEquals("The row count of the cloned table was not modified by the put",
|
||||
clonedTableRowCount + 1, UTIL.countRows(clonedTable));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a snapshot of a table, do a split, and verify that this only affects one table
|
||||
* @param online - Whether the table is online or not during the snapshot
|
||||
*/
|
||||
private void runTestRegionOperationsIndependent(boolean online) throws Exception {
|
||||
FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
|
||||
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
|
||||
|
||||
// Create a table
|
||||
Admin admin = UTIL.getHBaseAdmin();
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final TableName localTableName =
|
||||
TableName.valueOf(STRING_TABLE_NAME + startTime);
|
||||
HTable original = UTIL.createTable(localTableName, TEST_FAM);
|
||||
UTIL.loadTable(original, TEST_FAM);
|
||||
final int loadedTableCount = UTIL.countRows(original);
|
||||
System.out.println("Original table has: " + loadedTableCount + " rows");
|
||||
|
||||
final String snapshotNameAsString = "snapshot_" + localTableName;
|
||||
|
||||
// Create a snapshot
|
||||
SnapshotTestingUtils.createSnapshotAndValidate(admin, localTableName, TEST_FAM_STR,
|
||||
snapshotNameAsString, rootDir, fs, online);
|
||||
|
||||
if (!online) {
|
||||
tryDisable(admin, localTableName);
|
||||
}
|
||||
|
||||
TableName cloneTableName = TableName.valueOf("test-clone-" + localTableName);
|
||||
|
||||
// Clone the snapshot
|
||||
byte[] snapshotName = Bytes.toBytes(snapshotNameAsString);
|
||||
admin.cloneSnapshot(snapshotName, cloneTableName);
|
||||
|
||||
// Verify that region information is the same pre-split
|
||||
original.clearRegionCache();
|
||||
List<HRegionInfo> originalTableHRegions = admin.getTableRegions(localTableName);
|
||||
|
||||
final int originalRegionCount = originalTableHRegions.size();
|
||||
final int cloneTableRegionCount = admin.getTableRegions(cloneTableName).size();
|
||||
Assert.assertEquals(
|
||||
"The number of regions in the cloned table is different than in the original table.",
|
||||
originalRegionCount, cloneTableRegionCount);
|
||||
|
||||
// Split a region on the parent table
|
||||
admin.splitRegion(originalTableHRegions.get(0).getRegionName());
|
||||
waitOnSplit(original, originalRegionCount);
|
||||
|
||||
// Verify that the cloned table region is not split
|
||||
final int cloneTableRegionCount2 = admin.getTableRegions(cloneTableName).size();
|
||||
Assert.assertEquals(
|
||||
"The number of regions in the cloned table changed though none of its regions were split.",
|
||||
cloneTableRegionCount, cloneTableRegionCount2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a snapshot of a table, add metadata, and verify that this only
|
||||
* affects one table
|
||||
* @param online - Whether the table is online or not during the snapshot
|
||||
*/
|
||||
private void runTestSnapshotMetadataChangesIndependent(boolean online) throws Exception {
|
||||
FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
|
||||
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
|
||||
|
||||
// Create a table
|
||||
Admin admin = UTIL.getHBaseAdmin();
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final TableName localTableName =
|
||||
TableName.valueOf(STRING_TABLE_NAME + startTime);
|
||||
HTable original = UTIL.createTable(localTableName, TEST_FAM);
|
||||
UTIL.loadTable(original, TEST_FAM);
|
||||
|
||||
final String snapshotNameAsString = "snapshot_" + localTableName;
|
||||
|
||||
// Create a snapshot
|
||||
SnapshotTestingUtils.createSnapshotAndValidate(admin, localTableName, TEST_FAM_STR,
|
||||
snapshotNameAsString, rootDir, fs, online);
|
||||
|
||||
if (!online) {
|
||||
tryDisable(admin, localTableName);
|
||||
}
|
||||
|
||||
TableName cloneTableName = TableName.valueOf("test-clone-" + localTableName);
|
||||
|
||||
// Clone the snapshot
|
||||
byte[] snapshotName = Bytes.toBytes(snapshotNameAsString);
|
||||
admin.cloneSnapshot(snapshotName, cloneTableName);
|
||||
|
||||
// Add a new column family to the original table
|
||||
byte[] TEST_FAM_2 = Bytes.toBytes("fam2");
|
||||
HColumnDescriptor hcd = new HColumnDescriptor(TEST_FAM_2);
|
||||
|
||||
tryDisable(admin, localTableName);
|
||||
admin.addColumn(localTableName, hcd);
|
||||
|
||||
// Verify that it is not in the snapshot
|
||||
admin.enableTable(localTableName);
|
||||
UTIL.waitTableAvailable(localTableName);
|
||||
|
||||
// get a description of the cloned table
|
||||
// get a list of its families
|
||||
// assert that the family is there
|
||||
HTableDescriptor originalTableDescriptor = original.getTableDescriptor();
|
||||
HTableDescriptor clonedTableDescriptor = admin.getTableDescriptor(cloneTableName);
|
||||
|
||||
Assert.assertTrue("The original family was not found. There is something wrong. ",
|
||||
originalTableDescriptor.hasFamily(TEST_FAM));
|
||||
Assert.assertTrue("The original family was not found in the clone. There is something wrong. ",
|
||||
clonedTableDescriptor.hasFamily(TEST_FAM));
|
||||
|
||||
Assert.assertTrue("The new family was not found. ",
|
||||
originalTableDescriptor.hasFamily(TEST_FAM_2));
|
||||
Assert.assertTrue("The new family was not found. ",
|
||||
!clonedTableDescriptor.hasFamily(TEST_FAM_2));
|
||||
}
|
||||
|
||||
private void tryDisable(Admin admin, TableName localTableName) throws IOException {
|
||||
int offlineRetry = 0;
|
||||
while ( offlineRetry < 5 && admin.isTableEnabled(localTableName)) {
|
||||
try {
|
||||
admin.disableTable(localTableName);
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Error disabling the table", ioe);
|
||||
}
|
||||
offlineRetry ++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a snapshot of a table, add data, and verify that deleting the snapshot does not affect
|
||||
* either table.
|
||||
* @param online - Whether the table is online or not during the snapshot
|
||||
*/
|
||||
private void runTestSnapshotDeleteIndependent(boolean online) throws Exception {
|
||||
FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
|
||||
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
|
||||
|
||||
final Admin admin = UTIL.getHBaseAdmin();
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final TableName localTableName =
|
||||
TableName.valueOf(STRING_TABLE_NAME + startTime);
|
||||
|
||||
try (Table original = UTIL.createTable(localTableName, TEST_FAM)) {
|
||||
UTIL.loadTable(original, TEST_FAM);
|
||||
}
|
||||
|
||||
// Take a snapshot
|
||||
final String snapshotNameAsString = "snapshot_" + localTableName;
|
||||
byte[] snapshotName = Bytes.toBytes(snapshotNameAsString);
|
||||
|
||||
SnapshotTestingUtils.createSnapshotAndValidate(admin, localTableName, TEST_FAM_STR,
|
||||
snapshotNameAsString, rootDir, fs, online);
|
||||
|
||||
if (!online) {
|
||||
tryDisable(admin, localTableName);
|
||||
}
|
||||
|
||||
TableName cloneTableName = TableName.valueOf("test-clone-" + localTableName);
|
||||
admin.cloneSnapshot(snapshotName, cloneTableName);
|
||||
|
||||
UTIL.waitUntilAllRegionsAssigned(cloneTableName);
|
||||
|
||||
// Ensure the original table does not reference the HFiles anymore
|
||||
admin.majorCompact(localTableName);
|
||||
|
||||
// Deleting the snapshot used to break the cloned table by deleting in-use HFiles
|
||||
admin.deleteSnapshot(snapshotName);
|
||||
|
||||
// Wait for cleaner run and DFS heartbeats so that anything that is deletable is fully deleted
|
||||
do {
|
||||
Thread.sleep(5000);
|
||||
} while (!admin.listSnapshots(snapshotNameAsString).isEmpty());
|
||||
|
||||
try (Table original = UTIL.getConnection().getTable(localTableName)) {
|
||||
try (Table clonedTable = UTIL.getConnection().getTable(cloneTableName)) {
|
||||
// Verify that all regions of both tables are readable
|
||||
final int origTableRowCount = UTIL.countRows(original);
|
||||
final int clonedTableRowCount = UTIL.countRows(clonedTable);
|
||||
Assert.assertEquals(origTableRowCount, clonedTableRowCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected Table createTable(final TableName table, byte[] family) throws Exception {
|
||||
Table t = UTIL.createTable(table, family);
|
||||
// Wait for everything to be ready with the table
|
||||
UTIL.waitUntilAllRegionsAssigned(table);
|
||||
|
||||
// At this point the table should be good to go.
|
||||
return t;
|
||||
}
|
||||
|
||||
protected void loadData(final Table table, byte[]... families) throws Exception {
|
||||
UTIL.loadTable(table, families);
|
||||
}
|
||||
|
||||
protected int countRows(final Table table, final byte[]... families) throws Exception {
|
||||
return UTIL.countRows(table, families);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue