HBASE-20008 [backport] NullPointerException when restoring a snapshot after splitting a region

Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
Toshihiro Suzuki 2018-02-15 18:03:02 +09:00 committed by tedyu
parent 21f6830fe1
commit c0bca137ff
3 changed files with 85 additions and 24 deletions

View File

@ -183,8 +183,9 @@ public class RestoreSnapshotHandler extends TableEventHandler implements Snapsho
String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)
+ " failed. Try re-running the restore command."; + " failed. Try re-running the restore command.";
LOG.error(msg, e); LOG.error(msg, e);
monitor.receive(new ForeignException(masterServices.getServerName().toString(), e)); IOException rse = new RestoreSnapshotException(msg, e);
throw new RestoreSnapshotException(msg, e); monitor.receive(new ForeignException(masterServices.getServerName().toString(), rse));
throw rse;
} }
} }

View File

@ -209,38 +209,39 @@ public class RestoreSnapshotHelper {
metaChanges.addRegionToRemove(regionInfo); metaChanges.addRegionToRemove(regionInfo);
} }
} }
// Restore regions using the snapshot data
monitor.rethrowException();
status.setStatus("Restoring table regions...");
restoreHdfsRegions(exec, regionManifests, metaChanges.getRegionsToRestore());
status.setStatus("Finished restoring all table regions.");
// Remove regions from the current table
monitor.rethrowException();
status.setStatus("Starting to delete excess regions from table");
removeHdfsRegions(exec, metaChanges.getRegionsToRemove());
status.setStatus("Finished deleting excess regions from table.");
} }
// Regions to Add: present in the snapshot but not in the current table // Regions to Add: present in the snapshot but not in the current table
List<HRegionInfo> regionsToAdd = new ArrayList<HRegionInfo>(regionNames.size());
if (regionNames.size() > 0) { if (regionNames.size() > 0) {
List<HRegionInfo> regionsToAdd = new ArrayList<HRegionInfo>(regionNames.size());
monitor.rethrowException(); monitor.rethrowException();
for (String regionName: regionNames) { for (String regionName: regionNames) {
LOG.info("region to add: " + regionName); LOG.info("region to add: " + regionName);
regionsToAdd.add(HRegionInfo.convert(regionManifests.get(regionName).getRegionInfo())); regionsToAdd.add(HRegionInfo.convert(regionManifests.get(regionName).getRegionInfo()));
} }
// Create new regions cloning from the snapshot
monitor.rethrowException();
status.setStatus("Cloning regions...");
HRegionInfo[] clonedRegions = cloneHdfsRegions(exec, regionManifests, regionsToAdd);
metaChanges.setNewRegions(clonedRegions);
status.setStatus("Finished cloning regions.");
} }
// Create new regions cloning from the snapshot
// HBASE-20008: We need to call cloneHdfsRegions() before restoreHdfsRegions() because
// regionsMap is constructed in cloneHdfsRegions() and it can be used in restoreHdfsRegions().
monitor.rethrowException();
status.setStatus("Cloning regions...");
HRegionInfo[] clonedRegions = cloneHdfsRegions(exec, regionManifests, regionsToAdd);
metaChanges.setNewRegions(clonedRegions);
status.setStatus("Finished cloning regions.");
// Restore regions using the snapshot data
monitor.rethrowException();
status.setStatus("Restoring table regions...");
restoreHdfsRegions(exec, regionManifests, metaChanges.getRegionsToRestore());
status.setStatus("Finished restoring all table regions.");
// Remove regions from the current table
monitor.rethrowException();
status.setStatus("Starting to delete excess regions from table");
removeHdfsRegions(exec, metaChanges.getRegionsToRemove());
status.setStatus("Finished deleting excess regions from table.");
return metaChanges; return metaChanges;
} }
@ -655,11 +656,16 @@ public class RestoreSnapshotHelper {
// Add the daughter region to the map // Add the daughter region to the map
String regionName = Bytes.toString(regionsMap.get(regionInfo.getEncodedNameAsBytes())); String regionName = Bytes.toString(regionsMap.get(regionInfo.getEncodedNameAsBytes()));
if (regionName == null) {
regionName = regionInfo.getEncodedName();
}
LOG.debug("Restore reference " + regionName + " to " + clonedRegionName); LOG.debug("Restore reference " + regionName + " to " + clonedRegionName);
synchronized (parentsMap) { synchronized (parentsMap) {
Pair<String, String> daughters = parentsMap.get(clonedRegionName); Pair<String, String> daughters = parentsMap.get(clonedRegionName);
if (daughters == null) { if (daughters == null) {
daughters = new Pair<String, String>(regionName, null); // In case one side of the split is already compacted, regionName is put as both first and
// second of Pair
daughters = new Pair<String, String>(regionName, regionName);
parentsMap.put(clonedRegionName, daughters); parentsMap.put(clonedRegionName, daughters);
} else if (!regionName.equals(daughters.getFirst())) { } else if (!regionName.equals(daughters.getFirst())) {
daughters.setSecond(regionName); daughters.setSecond(regionName);

View File

@ -22,8 +22,11 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.CategoryBasedTimeout;
@ -32,6 +35,12 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.ScanType;
import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterFileSystem;
@ -282,6 +291,51 @@ public class TestRestoreSnapshotFromClient {
} }
} }
@Test
public void testRestoreSnapshotAfterSplittingRegions() throws IOException, InterruptedException {
// HBASE-20008: Add a coprocessor to delay compactions of the daughter regions. To reproduce
// the NullPointerException, we need to delay compactions of the daughter regions after
// splitting region.
HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName);
tableDescriptor.addCoprocessor(DelayCompactionObserver.class.getName());
admin.disableTable(tableName);
admin.modifyTable(tableName, tableDescriptor);
admin.enableTable(tableName);
List<HRegionInfo> regionInfos = admin.getTableRegions(tableName);
RegionReplicaUtil.removeNonDefaultRegions(regionInfos);
// Split the first region
splitRegion(regionInfos.get(0));
// Take a snapshot
admin.snapshot(snapshotName1, tableName);
// Restore the snapshot
admin.disableTable(tableName);
admin.restoreSnapshot(snapshotName1);
admin.enableTable(tableName);
SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot1Rows);
}
public static class DelayCompactionObserver extends BaseRegionObserver {
@Override
public InternalScanner preCompact(ObserverContext<RegionCoprocessorEnvironment> e,
final Store store, final InternalScanner scanner, final ScanType scanType)
throws IOException {
try {
// Delay 5 seconds.
TimeUnit.SECONDS.sleep(5);
} catch (InterruptedException ex) {
throw new InterruptedIOException(ex.getMessage());
}
return scanner;
}
}
// ========================================================================== // ==========================================================================
// Helpers // Helpers
// ========================================================================== // ==========================================================================