HBASE-20226: Parallelize region manifest deletes (#2159)

We observed this delete call to be a bottleneck for table with lots of
regions. Patch attempts to parallelize them.

Signed-off-by: Andrew Purtell <apurtell@apache.org>
(cherry picked from commit f07f30ae24)
This commit is contained in:
Bharath Vissapragada 2020-07-29 10:59:23 -07:00
parent bba70f08ea
commit 06236dbfcc
No known key found for this signature in database
GPG Key ID: 18AE42A0B5A93FA7
1 changed files with 59 additions and 35 deletions

View File

@ -20,11 +20,15 @@ package org.apache.hadoop.hbase.snapshot;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
@ -513,44 +517,64 @@ public final class SnapshotManifest {
workingDir, desc);
v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, workingDirFs,
workingDir, desc, manifestSizeLimit);
SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
dataManifestBuilder.setTableSchema(ProtobufUtil.toTableSchema(htd));
if (v1Regions != null && v1Regions.size() > 0) {
dataManifestBuilder.addAllRegionManifests(v1Regions);
}
if (v2Regions != null && v2Regions.size() > 0) {
dataManifestBuilder.addAllRegionManifests(v2Regions);
}
// Write the v2 Data Manifest.
// Once the data-manifest is written, the snapshot can be considered complete.
// Currently snapshots are written in a "temporary" directory and later
// moved to the "complated" snapshot directory.
setStatusMsg("Writing data manifest for " + this.desc.getName());
SnapshotDataManifest dataManifest = dataManifestBuilder.build();
writeDataManifest(dataManifest);
this.regionManifests = dataManifest.getRegionManifestsList();
// Remove the region manifests. Everything is now in the data-manifest.
// The delete operation is "relaxed", unless we get an exception we keep going.
// The extra files in the snapshot directory will not give any problem,
// since they have the same content as the data manifest, and even by re-reading
// them we will get the same information.
int totalDeletes = 0;
ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<>(tpool);
if (v1Regions != null) {
for (SnapshotRegionManifest regionManifest: v1Regions) {
++totalDeletes;
completionService.submit(() -> {
SnapshotManifestV1.deleteRegionManifest(workingDirFs, workingDir, regionManifest);
return null;
});
}
}
if (v2Regions != null) {
for (SnapshotRegionManifest regionManifest: v2Regions) {
++totalDeletes;
completionService.submit(() -> {
SnapshotManifestV2.deleteRegionManifest(workingDirFs, workingDir, regionManifest);
return null;
});
}
}
// Wait for the deletes to finish.
for (int i = 0; i < totalDeletes; i++) {
try {
completionService.take().get();
} catch (InterruptedException ie) {
throw new InterruptedIOException(ie.getMessage());
} catch (ExecutionException e) {
throw new IOException("Error deleting region manifests", e.getCause());
}
}
} finally {
tpool.shutdown();
}
SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
dataManifestBuilder.setTableSchema(ProtobufUtil.toTableSchema(htd));
if (v1Regions != null && v1Regions.size() > 0) {
dataManifestBuilder.addAllRegionManifests(v1Regions);
}
if (v2Regions != null && v2Regions.size() > 0) {
dataManifestBuilder.addAllRegionManifests(v2Regions);
}
// Write the v2 Data Manifest.
// Once the data-manifest is written, the snapshot can be considered complete.
// Currently snapshots are written in a "temporary" directory and later
// moved to the "complated" snapshot directory.
setStatusMsg("Writing data manifest for " + this.desc.getName());
SnapshotDataManifest dataManifest = dataManifestBuilder.build();
writeDataManifest(dataManifest);
this.regionManifests = dataManifest.getRegionManifestsList();
// Remove the region manifests. Everything is now in the data-manifest.
// The delete operation is "relaxed", unless we get an exception we keep going.
// The extra files in the snapshot directory will not give any problem,
// since they have the same content as the data manifest, and even by re-reading
// them we will get the same information.
if (v1Regions != null && v1Regions.size() > 0) {
for (SnapshotRegionManifest regionManifest: v1Regions) {
SnapshotManifestV1.deleteRegionManifest(workingDirFs, workingDir, regionManifest);
}
}
if (v2Regions != null && v2Regions.size() > 0) {
for (SnapshotRegionManifest regionManifest: v2Regions) {
SnapshotManifestV2.deleteRegionManifest(workingDirFs, workingDir, regionManifest);
}
}
}
/*