HBASE-3440 Clean out load_table.rb and make sure all roads lead to completebulkload tool
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1082247 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
60c70403f8
commit
692552597a
132
bin/loadtable.rb
132
bin/loadtable.rb
|
@ -17,134 +17,4 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Script that takes over from org.apache.hadoop.hbase.mapreduce.HFileOutputFormat.
|
||||
# Pass it output directory of HFileOutputFormat. It will read the passed files,
|
||||
# move them into place and update the catalog table appropriately. Warning:
|
||||
# it will overwrite anything that exists already for passed table.
|
||||
# It expects hbase to be up and running so it can insert table info.
|
||||
#
|
||||
# To see usage for this script, run:
|
||||
#
|
||||
# ${HBASE_HOME}/bin/hbase org.jruby.Main loadtable.rb
|
||||
#
|
||||
include Java
|
||||
import java.util.TreeMap
|
||||
import org.apache.hadoop.hbase.client.HTable
|
||||
import org.apache.hadoop.hbase.client.Put
|
||||
import org.apache.hadoop.hbase.util.FSUtils
|
||||
import org.apache.hadoop.hbase.util.Bytes
|
||||
import org.apache.hadoop.hbase.util.Writables
|
||||
import org.apache.hadoop.hbase.HConstants
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration
|
||||
import org.apache.hadoop.hbase.HRegionInfo
|
||||
import org.apache.hadoop.hbase.HTableDescriptor
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor
|
||||
import org.apache.hadoop.hbase.HRegionInfo
|
||||
import org.apache.hadoop.hbase.io.hfile.HFile
|
||||
import org.apache.hadoop.fs.Path
|
||||
import org.apache.hadoop.fs.FileSystem
|
||||
import org.apache.hadoop.mapred.OutputLogFilter
|
||||
import org.apache.commons.logging.Log
|
||||
import org.apache.commons.logging.LogFactory
|
||||
|
||||
# Name of this script
|
||||
NAME = "loadtable"
|
||||
|
||||
# Print usage for this script
|
||||
def usage
|
||||
puts 'Usage: %s.rb TABLENAME HFILEOUTPUTFORMAT_OUTPUT_DIR' % NAME
|
||||
exit!
|
||||
end
|
||||
|
||||
# Passed 'dir' exists and is a directory else exception
|
||||
def isDirExists(fs, dir)
|
||||
raise IOError.new("Does not exit: " + dir.toString()) unless fs.exists(dir)
|
||||
raise IOError.new("Not a directory: " + dir.toString()) unless fs.isDirectory(dir)
|
||||
end
|
||||
|
||||
# Check arguments
|
||||
if ARGV.size != 2
|
||||
usage
|
||||
end
|
||||
|
||||
# Check good table names were passed.
|
||||
tableName = HTableDescriptor.isLegalTableName(ARGV[0].to_java_bytes)
|
||||
outputdir = Path.new(ARGV[1])
|
||||
|
||||
# Get configuration to use.
|
||||
c = HBaseConfiguration.new()
|
||||
# Get a logger and a metautils instance.
|
||||
LOG = LogFactory.getLog(NAME)
|
||||
|
||||
# Set hadoop filesystem configuration using the hbase.rootdir.
|
||||
# Otherwise, we'll always use localhost though the hbase.rootdir
|
||||
# might be pointing at hdfs location.
|
||||
c.set("fs.defaultFS", c.get(HConstants::HBASE_DIR))
|
||||
fs = FileSystem.get(c)
|
||||
|
||||
# If hfiles directory does not exist, exit.
|
||||
isDirExists(fs, outputdir)
|
||||
# Create table dir if it doesn't exist.
|
||||
rootdir = FSUtils.getRootDir(c)
|
||||
tableDir = Path.new(rootdir, Path.new(Bytes.toString(tableName)))
|
||||
fs.mkdirs(tableDir) unless fs.exists(tableDir)
|
||||
|
||||
# Start. Per hfile, move it, and insert an entry in catalog table.
|
||||
families = fs.listStatus(outputdir, OutputLogFilter.new())
|
||||
throw IOError.new("Can do one family only") if families.length > 1
|
||||
# Read meta on all files. Put in map keyed by start key.
|
||||
map = TreeMap.new(Bytes::ByteArrayComparator.new())
|
||||
family = families[0]
|
||||
# Make sure this subdir exists under table
|
||||
hfiles = fs.listStatus(family.getPath())
|
||||
LOG.info("Found " + hfiles.length.to_s + " hfiles");
|
||||
count = 0
|
||||
for hfile in hfiles
|
||||
reader = HFile::Reader.new(fs, hfile.getPath(), nil, false)
|
||||
begin
|
||||
fileinfo = reader.loadFileInfo()
|
||||
firstkey = reader.getFirstKey()
|
||||
# First key is row/column/ts. We just want the row part.
|
||||
rowlen = Bytes.toShort(firstkey)
|
||||
firstkeyrow = firstkey[2, rowlen]
|
||||
LOG.info(count.to_s + " read firstkey of " +
|
||||
Bytes.toString(firstkeyrow) + " from " + hfile.getPath().toString())
|
||||
map.put(firstkeyrow, [hfile, fileinfo])
|
||||
count = count + 1
|
||||
ensure
|
||||
reader.close()
|
||||
end
|
||||
end
|
||||
# Now I have sorted list of fileinfo+paths. Start insert.
|
||||
# Get a client on catalog table.
|
||||
meta = HTable.new(c, HConstants::META_TABLE_NAME)
|
||||
# I can't find out from hfile how its compressed.
|
||||
# Using all defaults. Change manually after loading if
|
||||
# something else wanted in column or table attributes.
|
||||
familyName = family.getPath().getName()
|
||||
hcd = HColumnDescriptor.new(familyName)
|
||||
htd = HTableDescriptor.new(tableName)
|
||||
htd.addFamily(hcd)
|
||||
previouslastkey = HConstants::EMPTY_START_ROW
|
||||
count = map.size()
|
||||
for i in map.descendingKeySet().iterator()
|
||||
tuple = map.get(i)
|
||||
startkey = i
|
||||
count = count - 1
|
||||
# If last time through loop, set start row as EMPTY_START_ROW
|
||||
startkey = HConstants::EMPTY_START_ROW unless count > 0
|
||||
# Next time around, lastkey is this startkey
|
||||
hri = HRegionInfo.new(htd, startkey, previouslastkey)
|
||||
previouslastkey = startkey
|
||||
LOG.info(hri.toString())
|
||||
hfile = tuple[0].getPath()
|
||||
rdir = Path.new(Path.new(tableDir, hri.getEncodedName().to_s), familyName)
|
||||
fs.mkdirs(rdir)
|
||||
tgt = Path.new(rdir, hfile.getName())
|
||||
fs.rename(hfile, tgt)
|
||||
LOG.info("Moved " + hfile.toString() + " to " + tgt.toString())
|
||||
p = Put.new(hri.getRegionName())
|
||||
p.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
|
||||
meta.put(p)
|
||||
LOG.info("Inserted " + hri.toString())
|
||||
end
|
||||
puts 'DISABLED!!!! Use completebulkload instead. See tail of http://hbase.apache.org/bulk-loads.html'
|
||||
|
|
Loading…
Reference in New Issue