HADOOP-2234 TableInputFormat erroneously aggregates map values

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@599643 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2007-11-30 00:01:25 +00:00
parent b8291d673e
commit bf176b63f3
2 changed files with 21 additions and 32 deletions

View File

@ -40,6 +40,7 @@ Trunk (unreleased changes)
HADOOP-2253 getRow can return HBASE::DELETEVAL cells HADOOP-2253 getRow can return HBASE::DELETEVAL cells
(Bryan Duxbury via Stack) (Bryan Duxbury via Stack)
HADOOP-2295 Fix assigning a region to multiple servers HADOOP-2295 Fix assigning a region to multiple servers
HADOOP-2234 TableInputFormat erroneously aggregates map values
IMPROVEMENTS IMPROVEMENTS
HADOOP-2401 Add convenience put method that takes writable HADOOP-2401 Add convenience put method that takes writable

View File

@ -49,7 +49,6 @@ import org.apache.log4j.Logger;
*/ */
public class TableInputFormat public class TableInputFormat
implements InputFormat<HStoreKey, MapWritable>, JobConfigurable { implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
static final Logger LOG = Logger.getLogger(TableInputFormat.class.getName()); static final Logger LOG = Logger.getLogger(TableInputFormat.class.getName());
/** /**
@ -67,9 +66,9 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
* return (HStoreKey, MapWritable<Text, ImmutableBytesWritable>) pairs * return (HStoreKey, MapWritable<Text, ImmutableBytesWritable>) pairs
*/ */
class TableRecordReader implements RecordReader<HStoreKey, MapWritable> { class TableRecordReader implements RecordReader<HStoreKey, MapWritable> {
private HScannerInterface m_scanner; private final HScannerInterface m_scanner;
private SortedMap<Text, byte[]> m_row; // current buffer // current buffer
private Text m_endRow; private final SortedMap<Text, byte[]> m_row = new TreeMap<Text, byte[]>();
/** /**
* Constructor * Constructor
@ -78,14 +77,15 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
* @throws IOException * @throws IOException
*/ */
public TableRecordReader(Text startRow, Text endRow) throws IOException { public TableRecordReader(Text startRow, Text endRow) throws IOException {
m_row = new TreeMap<Text, byte[]>(); if (endRow != null && endRow.getLength() > 0) {
m_scanner = m_table.obtainScanner(m_cols, startRow); this.m_scanner = m_table.obtainScanner(m_cols, startRow, endRow);
m_endRow = endRow; } else {
this.m_scanner = m_table.obtainScanner(m_cols, startRow);
}
} }
/** {@inheritDoc} */
public void close() throws IOException { public void close() throws IOException {
m_scanner.close(); this.m_scanner.close();
} }
/** /**
@ -132,33 +132,26 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public boolean next(HStoreKey key, MapWritable value) throws IOException { public boolean next(HStoreKey key, MapWritable value) throws IOException {
m_row.clear(); this.m_row.clear();
HStoreKey tKey = key; HStoreKey tKey = key;
boolean hasMore = m_scanner.next(tKey, m_row); boolean hasMore = this.m_scanner.next(tKey, this.m_row);
if (hasMore) { if (hasMore) {
if(m_endRow.getLength() > 0 && // Clear value to remove content added by previous call to next.
(tKey.getRow().compareTo(m_endRow) >= 0)) { value.clear();
for (Map.Entry<Text, byte[]> e: this.m_row.entrySet()) {
hasMore = false;
} else {
for(Map.Entry<Text, byte[]> e: m_row.entrySet()) {
value.put(e.getKey(), new ImmutableBytesWritable(e.getValue())); value.put(e.getKey(), new ImmutableBytesWritable(e.getValue()));
} }
} }
}
return hasMore; return hasMore;
} }
} }
/** {@inheritDoc} */
public RecordReader<HStoreKey, MapWritable> getRecordReader( public RecordReader<HStoreKey, MapWritable> getRecordReader(
InputSplit split, InputSplit split,
@SuppressWarnings("unused") JobConf job, @SuppressWarnings("unused") JobConf job,
@SuppressWarnings("unused") Reporter reporter) throws IOException { @SuppressWarnings("unused") Reporter reporter)
throws IOException {
TableSplit tSplit = (TableSplit)split; TableSplit tSplit = (TableSplit)split;
return new TableRecordReader(tSplit.getStartRow(), tSplit.getEndRow()); return new TableRecordReader(tSplit.getStartRow(), tSplit.getEndRow());
} }
@ -185,7 +178,6 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
return splits; return splits;
} }
/** {@inheritDoc} */
public void configure(JobConf job) { public void configure(JobConf job) {
Path[] tableNames = job.getInputPaths(); Path[] tableNames = job.getInputPaths();
m_tableName = new Text(tableNames[0].getName()); m_tableName = new Text(tableNames[0].getName());
@ -202,18 +194,14 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
} }
} }
/** {@inheritDoc} */
public void validateInput(JobConf job) throws IOException { public void validateInput(JobConf job) throws IOException {
// expecting exactly one path // expecting exactly one path
Path[] tableNames = job.getInputPaths(); Path[] tableNames = job.getInputPaths();
if(tableNames == null || tableNames.length > 1) { if(tableNames == null || tableNames.length > 1) {
throw new IOException("expecting one table name"); throw new IOException("expecting one table name");
} }
// expecting at least one column // expecting at least one column
String colArg = job.get(COLUMN_LIST); String colArg = job.get(COLUMN_LIST);
if(colArg == null || colArg.length() == 0) { if(colArg == null || colArg.length() == 0) {
throw new IOException("expecting at least one column"); throw new IOException("expecting at least one column");