HBASE-21782 LoadIncrementalHFiles should not be IA.Public
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
parent
5f25985b98
commit
2d4819dbed
|
@ -19,12 +19,12 @@
|
|||
package org.apache.hadoop.hbase.mapreduce;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.yetus.audience.InterfaceStability;
|
||||
import org.apache.hadoop.hbase.mapreduce.replication.VerifyReplication;
|
||||
import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
|
||||
import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
|
||||
import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
|
||||
import org.apache.hadoop.util.ProgramDriver;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.yetus.audience.InterfaceStability;
|
||||
|
||||
/**
|
||||
* Driver for hbase mapreduce jobs. Select which to run by passing
|
||||
|
@ -33,10 +33,7 @@ import org.apache.hadoop.util.ProgramDriver;
|
|||
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
|
||||
@InterfaceStability.Stable
|
||||
public class Driver {
|
||||
/**
|
||||
* @param args
|
||||
* @throws Throwable
|
||||
*/
|
||||
|
||||
public static void main(String[] args) throws Throwable {
|
||||
ProgramDriver pgd = new ProgramDriver();
|
||||
|
||||
|
@ -47,7 +44,7 @@ public class Driver {
|
|||
pgd.addClass(Export.NAME, Export.class, "Write table data to HDFS.");
|
||||
pgd.addClass(Import.NAME, Import.class, "Import data written by Export.");
|
||||
pgd.addClass(ImportTsv.NAME, ImportTsv.class, "Import data in TSV format.");
|
||||
pgd.addClass(LoadIncrementalHFiles.NAME, LoadIncrementalHFiles.class,
|
||||
pgd.addClass(BulkLoadHFilesTool.NAME, BulkLoadHFilesTool.class,
|
||||
"Complete a bulk data load.");
|
||||
pgd.addClass(CopyTable.NAME, CopyTable.class,
|
||||
"Export a table from local cluster to peer cluster.");
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.tool;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.TableNotFoundException;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* The tool to let you load the output of {@code HFileOutputFormat} into an existing table
|
||||
* programmatically. Not thread safe.
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
public interface BulkLoadHFiles {
|
||||
|
||||
static final String RETRY_ON_IO_EXCEPTION = "hbase.bulkload.retries.retryOnIOException";
|
||||
static final String MAX_FILES_PER_REGION_PER_FAMILY =
|
||||
"hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily";
|
||||
static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
|
||||
static final String CREATE_TABLE_CONF_KEY = "create.table";
|
||||
static final String IGNORE_UNMATCHED_CF_CONF_KEY = "ignore.unmatched.families";
|
||||
static final String ALWAYS_COPY_FILES = "always.copy.files";
|
||||
|
||||
/**
|
||||
* Represents an HFile waiting to be loaded. An queue is used in this class in order to support
|
||||
* the case where a region has split during the process of the load. When this happens, the HFile
|
||||
* is split into two physical parts across the new region boundary, and each part is added back
|
||||
* into the queue. The import process finishes when the queue is empty.
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
public static class LoadQueueItem {
|
||||
|
||||
private final byte[] family;
|
||||
|
||||
private final Path hfilePath;
|
||||
|
||||
public LoadQueueItem(byte[] family, Path hfilePath) {
|
||||
this.family = family;
|
||||
this.hfilePath = hfilePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "family:" + Bytes.toString(family) + " path:" + hfilePath.toString();
|
||||
}
|
||||
|
||||
public byte[] getFamily() {
|
||||
return family;
|
||||
}
|
||||
|
||||
public Path getFilePath() {
|
||||
return hfilePath;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a bulk load of the given directory into the given pre-existing table.
|
||||
* @param tableName the table to load into
|
||||
* @param family2Files map of family to List of hfiles
|
||||
* @throws TableNotFoundException if table does not yet exist
|
||||
*/
|
||||
Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Map<byte[], List<Path>> family2Files)
|
||||
throws TableNotFoundException, IOException;
|
||||
|
||||
/**
|
||||
* Perform a bulk load of the given directory into the given pre-existing table.
|
||||
* @param tableName the table to load into
|
||||
* @param dir the directory that was provided as the output path of a job using
|
||||
* {@code HFileOutputFormat}
|
||||
* @throws TableNotFoundException if table does not yet exist
|
||||
*/
|
||||
Map<LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Path dir)
|
||||
throws TableNotFoundException, IOException;
|
||||
|
||||
static BulkLoadHFiles create(Configuration conf) {
|
||||
return new BulkLoadHFilesTool(conf);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.tool;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.TableNotFoundException;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* The implementation for {@link BulkLoadHFiles}, and also can be executed from command line as a
|
||||
* tool.
|
||||
*/
|
||||
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
|
||||
public class BulkLoadHFilesTool extends LoadIncrementalHFiles implements BulkLoadHFiles {
|
||||
|
||||
public static final String NAME = "completebulkload";
|
||||
|
||||
public BulkLoadHFilesTool(Configuration conf) {
|
||||
super(conf);
|
||||
}
|
||||
|
||||
private Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> convert(
|
||||
Map<LoadIncrementalHFiles.LoadQueueItem, ByteBuffer> map) {
|
||||
return map.entrySet().stream().collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName,
|
||||
Map<byte[], List<Path>> family2Files) throws TableNotFoundException, IOException {
|
||||
return convert(run(family2Files, tableName));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> bulkLoad(TableName tableName, Path dir)
|
||||
throws TableNotFoundException, IOException {
|
||||
return convert(run(dir, tableName));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Configuration conf = HBaseConfiguration.create();
|
||||
int ret = ToolRunner.run(conf, new BulkLoadHFilesTool(conf), args);
|
||||
System.exit(ret);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.tool;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InterruptedIOException;
|
||||
|
@ -46,7 +48,6 @@ import java.util.concurrent.ThreadPoolExecutor;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
import static java.lang.String.format;
|
||||
import org.apache.commons.lang3.mutable.MutableInt;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
|
@ -87,13 +88,6 @@ import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
|
|||
import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
|
||||
import org.apache.hadoop.hbase.security.UserProvider;
|
||||
import org.apache.hadoop.hbase.security.token.FsDelegationToken;
|
||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.HashMultimap;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Multimaps;
|
||||
import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSHDFSUtils;
|
||||
import org.apache.hadoop.hbase.util.FSVisitor;
|
||||
|
@ -104,22 +98,41 @@ import org.apache.yetus.audience.InterfaceAudience;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.HashMultimap;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Multimaps;
|
||||
import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
|
||||
/**
|
||||
* Tool to load the output of HFileOutputFormat into an existing table.
|
||||
* @deprecated since 2.2.0, will be removed in 3.0.0. Use {@link BulkLoadHFiles} instead. Please
|
||||
* rewrite your code if you rely on methods other than the {@link #run(Map, TableName)}
|
||||
* and {@link #run(String, TableName)}, as all the methods other than them will be
|
||||
* removed with no replacement.
|
||||
*/
|
||||
@Deprecated
|
||||
@InterfaceAudience.Public
|
||||
public class LoadIncrementalHFiles extends Configured implements Tool {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(LoadIncrementalHFiles.class);
|
||||
|
||||
public static final String NAME = "completebulkload";
|
||||
static final String RETRY_ON_IO_EXCEPTION = "hbase.bulkload.retries.retryOnIOException";
|
||||
/**
|
||||
* @deprecated since 2.2.0, will be removed in 3.0.0, with no replacement. End user should not
|
||||
* depend on this value.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String NAME = BulkLoadHFilesTool.NAME;
|
||||
static final String RETRY_ON_IO_EXCEPTION = BulkLoadHFiles.RETRY_ON_IO_EXCEPTION;
|
||||
public static final String MAX_FILES_PER_REGION_PER_FAMILY =
|
||||
"hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily";
|
||||
private static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
|
||||
public final static String CREATE_TABLE_CONF_KEY = "create.table";
|
||||
public final static String IGNORE_UNMATCHED_CF_CONF_KEY = "ignore.unmatched.families";
|
||||
public final static String ALWAYS_COPY_FILES = "always.copy.files";
|
||||
BulkLoadHFiles.MAX_FILES_PER_REGION_PER_FAMILY;
|
||||
private static final String ASSIGN_SEQ_IDS = BulkLoadHFiles.ASSIGN_SEQ_IDS;
|
||||
public final static String CREATE_TABLE_CONF_KEY = BulkLoadHFiles.CREATE_TABLE_CONF_KEY;
|
||||
public final static String IGNORE_UNMATCHED_CF_CONF_KEY =
|
||||
BulkLoadHFiles.IGNORE_UNMATCHED_CF_CONF_KEY;
|
||||
public final static String ALWAYS_COPY_FILES = BulkLoadHFiles.ALWAYS_COPY_FILES;
|
||||
|
||||
// We use a '.' prefix which is ignored when walking directory trees
|
||||
// above. It is invalid family name.
|
||||
|
@ -142,28 +155,14 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
|
|||
* the case where a region has split during the process of the load. When this happens, the HFile
|
||||
* is split into two physical parts across the new region boundary, and each part is added back
|
||||
* into the queue. The import process finishes when the queue is empty.
|
||||
* @deprecated Use {@link BulkLoadHFiles} instead.
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
public static class LoadQueueItem {
|
||||
private final byte[] family;
|
||||
private final Path hfilePath;
|
||||
@Deprecated
|
||||
public static class LoadQueueItem extends BulkLoadHFiles.LoadQueueItem {
|
||||
|
||||
public LoadQueueItem(byte[] family, Path hfilePath) {
|
||||
this.family = family;
|
||||
this.hfilePath = hfilePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "family:" + Bytes.toString(family) + " path:" + hfilePath.toString();
|
||||
}
|
||||
|
||||
public byte[] getFamily() {
|
||||
return family;
|
||||
}
|
||||
|
||||
public Path getFilePath() {
|
||||
return hfilePath;
|
||||
super(family, hfilePath);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -825,8 +824,7 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
|
|||
* If the table is created for the first time, then "completebulkload" reads the files twice. More
|
||||
* modifications necessary if we want to avoid doing it.
|
||||
*/
|
||||
private void createTable(TableName tableName, String dirPath, Admin admin) throws IOException {
|
||||
final Path hfofDir = new Path(dirPath);
|
||||
private void createTable(TableName tableName, Path hfofDir, Admin admin) throws IOException {
|
||||
final FileSystem fs = hfofDir.getFileSystem(getConf());
|
||||
|
||||
// Add column families
|
||||
|
@ -1148,13 +1146,7 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
|
|||
return getConf().getBoolean(ALWAYS_COPY_FILES, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform bulk load on the given table.
|
||||
* @param hfofDir the directory that was provided as the output path of a job using
|
||||
* HFileOutputFormat
|
||||
* @param tableName the table to load into
|
||||
*/
|
||||
public Map<LoadQueueItem, ByteBuffer> run(String hfofDir, TableName tableName)
|
||||
protected final Map<LoadQueueItem, ByteBuffer> run(Path hfofDir, TableName tableName)
|
||||
throws IOException {
|
||||
try (Connection connection = ConnectionFactory.createConnection(getConf());
|
||||
Admin admin = connection.getAdmin()) {
|
||||
|
@ -1169,11 +1161,21 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
|
|||
}
|
||||
try (Table table = connection.getTable(tableName);
|
||||
RegionLocator locator = connection.getRegionLocator(tableName)) {
|
||||
return doBulkLoad(new Path(hfofDir), admin, table, locator, isSilence(),
|
||||
return doBulkLoad(hfofDir, admin, table, locator, isSilence(),
|
||||
isAlwaysCopyFiles());
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Perform bulk load on the given table.
|
||||
* @param hfofDir the directory that was provided as the output path of a job using
|
||||
* HFileOutputFormat
|
||||
* @param tableName the table to load into
|
||||
*/
|
||||
public Map<LoadQueueItem, ByteBuffer> run(String hfofDir, TableName tableName)
|
||||
throws IOException {
|
||||
return run(new Path(hfofDir), tableName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform bulk load on the given table.
|
||||
|
|
|
@ -54,7 +54,6 @@ import org.apache.hadoop.hbase.io.hfile.HFileScanner;
|
|||
import org.apache.hadoop.hbase.regionserver.BloomType;
|
||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||
import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles.LoadQueueItem;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HFileTestUtil;
|
||||
|
@ -346,7 +345,7 @@ public class TestLoadIncrementalHFiles {
|
|||
if (copyFiles) {
|
||||
conf.setBoolean(LoadIncrementalHFiles.ALWAYS_COPY_FILES, true);
|
||||
}
|
||||
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
|
||||
BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
|
||||
List<String> args = Lists.newArrayList(baseDirectory.toString(), tableName.toString());
|
||||
if (depth == 3) {
|
||||
args.add("-loadTable");
|
||||
|
@ -356,10 +355,10 @@ public class TestLoadIncrementalHFiles {
|
|||
if (deleteFile) {
|
||||
fs.delete(last, true);
|
||||
}
|
||||
Map<LoadQueueItem, ByteBuffer> loaded = loader.run(map, tableName);
|
||||
Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> loaded = loader.bulkLoad(tableName, map);
|
||||
if (deleteFile) {
|
||||
expectedRows -= 1000;
|
||||
for (LoadQueueItem item : loaded.keySet()) {
|
||||
for (BulkLoadHFiles.LoadQueueItem item : loaded.keySet()) {
|
||||
if (item.getFilePath().getName().equals(last.getName())) {
|
||||
fail(last + " should be missing");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue