HBASE-1962 Bulk loading script makes regions incorrectly (loadtable.rb)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@834120 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2009-11-09 15:56:18 +00:00
parent 54ee56f8e4
commit 9ea1470180
2 changed files with 18 additions and 18 deletions

View File

@ -100,6 +100,7 @@ Release 0.21.0 - Unreleased
HBASE-1928 ROOT and META tables stay in transition state (making the system HBASE-1928 ROOT and META tables stay in transition state (making the system
not usable) if the designated regionServer dies before the not usable) if the designated regionServer dies before the
assignment is complete (Yannis Pavlidis via Stack) assignment is complete (Yannis Pavlidis via Stack)
HBASE-1962 Bulk loading script makes regions incorrectly (loadtable.rb)
IMPROVEMENTS IMPROVEMENTS
HBASE-1760 Cleanup TODOs in HTable HBASE-1760 Cleanup TODOs in HTable

View File

@ -73,7 +73,7 @@ fs.mkdirs(tableDir) unless fs.exists(tableDir)
# Start. Per hfile, move it, and insert an entry in catalog table. # Start. Per hfile, move it, and insert an entry in catalog table.
families = fs.listStatus(outputdir, OutputLogFilter.new()) families = fs.listStatus(outputdir, OutputLogFilter.new())
throw IOError.new("Can do one family only") if families.length > 1 throw IOError.new("Can do one family only") if families.length > 1
# Read meta on all files. Put in map keyed by end key. # Read meta on all files. Put in map keyed by start key.
map = TreeMap.new(Bytes::ByteArrayComparator.new()) map = TreeMap.new(Bytes::ByteArrayComparator.new())
family = families[0] family = families[0]
# Make sure this subdir exists under table # Make sure this subdir exists under table
@ -84,12 +84,13 @@ for hfile in hfiles
reader = HFile::Reader.new(fs, hfile.getPath(), nil, false) reader = HFile::Reader.new(fs, hfile.getPath(), nil, false)
begin begin
fileinfo = reader.loadFileInfo() fileinfo = reader.loadFileInfo()
lastkey = reader.getLastKey() firstkey = reader.getFirstKey()
# Last key is row/column/ts. We just want the row part. # First key is row/column/ts. We just want the row part.
rowlen = Bytes.toShort(lastkey) rowlen = Bytes.toShort(firstkey)
LOG.info(count.to_s + " read lastrow of " + firstkeyrow = firstkey[2, rowlen]
Bytes.toString(lastkey[2, rowlen]) + " from " + hfile.getPath().toString()) LOG.info(count.to_s + " read firstkey of " +
map.put(lastkey[2, rowlen], [hfile, fileinfo]) Bytes.toString(firstkeyrow) + " from " + hfile.getPath().toString())
map.put(firstkeyrow, [hfile, fileinfo])
count = count + 1 count = count + 1
ensure ensure
reader.close() reader.close()
@ -106,18 +107,16 @@ hcd = HColumnDescriptor.new(familyName)
htd = HTableDescriptor.new(tableName) htd = HTableDescriptor.new(tableName)
htd.addFamily(hcd) htd.addFamily(hcd)
previouslastkey = HConstants::EMPTY_START_ROW previouslastkey = HConstants::EMPTY_START_ROW
count = 0 count = map.size()
for i in map.keySet() for i in map.descendingIterator()
tuple = map.get(i) tuple = map.get(i)
startkey = previouslastkey startkey = i
count = 1 + count count = count - 1
lastkey = i # If last time through loop, set start row as EMPTY_START_ROW
if count == map.size() startkey = HConstants::EMPTY_START_ROW unless count > 0
# Then we are at last key. Set it to special indicator # Next time around, lastkey is this startkey
lastkey = HConstants::EMPTY_START_ROW hri = HRegionInfo.new(htd, startkey, previouslastkey)
end previouslastkey = startkey
previouslastkey = lastkey
hri = HRegionInfo.new(htd, startkey, lastkey)
LOG.info(hri.toString()) LOG.info(hri.toString())
hfile = tuple[0].getPath() hfile = tuple[0].getPath()
rdir = Path.new(Path.new(tableDir, hri.getEncodedName().to_s), familyName) rdir = Path.new(Path.new(tableDir, hri.getEncodedName().to_s), familyName)