HADOOP-2234 TableInputFormat erroneously aggregates map values
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@599643 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b8291d673e
commit
bf176b63f3
|
@ -40,6 +40,7 @@ Trunk (unreleased changes)
|
||||||
HADOOP-2253 getRow can return HBASE::DELETEVAL cells
|
HADOOP-2253 getRow can return HBASE::DELETEVAL cells
|
||||||
(Bryan Duxbury via Stack)
|
(Bryan Duxbury via Stack)
|
||||||
HADOOP-2295 Fix assigning a region to multiple servers
|
HADOOP-2295 Fix assigning a region to multiple servers
|
||||||
|
HADOOP-2234 TableInputFormat erroneously aggregates map values
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HADOOP-2401 Add convenience put method that takes writable
|
HADOOP-2401 Add convenience put method that takes writable
|
||||||
|
|
|
@ -49,7 +49,6 @@ import org.apache.log4j.Logger;
|
||||||
*/
|
*/
|
||||||
public class TableInputFormat
|
public class TableInputFormat
|
||||||
implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
||||||
|
|
||||||
static final Logger LOG = Logger.getLogger(TableInputFormat.class.getName());
|
static final Logger LOG = Logger.getLogger(TableInputFormat.class.getName());
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -67,9 +66,9 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
||||||
* return (HStoreKey, MapWritable<Text, ImmutableBytesWritable>) pairs
|
* return (HStoreKey, MapWritable<Text, ImmutableBytesWritable>) pairs
|
||||||
*/
|
*/
|
||||||
class TableRecordReader implements RecordReader<HStoreKey, MapWritable> {
|
class TableRecordReader implements RecordReader<HStoreKey, MapWritable> {
|
||||||
private HScannerInterface m_scanner;
|
private final HScannerInterface m_scanner;
|
||||||
private SortedMap<Text, byte[]> m_row; // current buffer
|
// current buffer
|
||||||
private Text m_endRow;
|
private final SortedMap<Text, byte[]> m_row = new TreeMap<Text, byte[]>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
|
@ -78,14 +77,15 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public TableRecordReader(Text startRow, Text endRow) throws IOException {
|
public TableRecordReader(Text startRow, Text endRow) throws IOException {
|
||||||
m_row = new TreeMap<Text, byte[]>();
|
if (endRow != null && endRow.getLength() > 0) {
|
||||||
m_scanner = m_table.obtainScanner(m_cols, startRow);
|
this.m_scanner = m_table.obtainScanner(m_cols, startRow, endRow);
|
||||||
m_endRow = endRow;
|
} else {
|
||||||
|
this.m_scanner = m_table.obtainScanner(m_cols, startRow);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
m_scanner.close();
|
this.m_scanner.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -132,33 +132,26 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public boolean next(HStoreKey key, MapWritable value) throws IOException {
|
public boolean next(HStoreKey key, MapWritable value) throws IOException {
|
||||||
m_row.clear();
|
this.m_row.clear();
|
||||||
HStoreKey tKey = key;
|
HStoreKey tKey = key;
|
||||||
boolean hasMore = m_scanner.next(tKey, m_row);
|
boolean hasMore = this.m_scanner.next(tKey, this.m_row);
|
||||||
|
|
||||||
if (hasMore) {
|
if (hasMore) {
|
||||||
if(m_endRow.getLength() > 0 &&
|
// Clear value to remove content added by previous call to next.
|
||||||
(tKey.getRow().compareTo(m_endRow) >= 0)) {
|
value.clear();
|
||||||
|
for (Map.Entry<Text, byte[]> e: this.m_row.entrySet()) {
|
||||||
hasMore = false;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
for(Map.Entry<Text, byte[]> e: m_row.entrySet()) {
|
|
||||||
value.put(e.getKey(), new ImmutableBytesWritable(e.getValue()));
|
value.put(e.getKey(), new ImmutableBytesWritable(e.getValue()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return hasMore;
|
return hasMore;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public RecordReader<HStoreKey, MapWritable> getRecordReader(
|
public RecordReader<HStoreKey, MapWritable> getRecordReader(
|
||||||
InputSplit split,
|
InputSplit split,
|
||||||
@SuppressWarnings("unused") JobConf job,
|
@SuppressWarnings("unused") JobConf job,
|
||||||
@SuppressWarnings("unused") Reporter reporter) throws IOException {
|
@SuppressWarnings("unused") Reporter reporter)
|
||||||
|
throws IOException {
|
||||||
TableSplit tSplit = (TableSplit)split;
|
TableSplit tSplit = (TableSplit)split;
|
||||||
return new TableRecordReader(tSplit.getStartRow(), tSplit.getEndRow());
|
return new TableRecordReader(tSplit.getStartRow(), tSplit.getEndRow());
|
||||||
}
|
}
|
||||||
|
@ -185,7 +178,6 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
||||||
return splits;
|
return splits;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void configure(JobConf job) {
|
public void configure(JobConf job) {
|
||||||
Path[] tableNames = job.getInputPaths();
|
Path[] tableNames = job.getInputPaths();
|
||||||
m_tableName = new Text(tableNames[0].getName());
|
m_tableName = new Text(tableNames[0].getName());
|
||||||
|
@ -202,18 +194,14 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void validateInput(JobConf job) throws IOException {
|
public void validateInput(JobConf job) throws IOException {
|
||||||
|
|
||||||
// expecting exactly one path
|
// expecting exactly one path
|
||||||
|
|
||||||
Path[] tableNames = job.getInputPaths();
|
Path[] tableNames = job.getInputPaths();
|
||||||
if(tableNames == null || tableNames.length > 1) {
|
if(tableNames == null || tableNames.length > 1) {
|
||||||
throw new IOException("expecting one table name");
|
throw new IOException("expecting one table name");
|
||||||
}
|
}
|
||||||
|
|
||||||
// expecting at least one column
|
// expecting at least one column
|
||||||
|
|
||||||
String colArg = job.get(COLUMN_LIST);
|
String colArg = job.get(COLUMN_LIST);
|
||||||
if(colArg == null || colArg.length() == 0) {
|
if(colArg == null || colArg.length() == 0) {
|
||||||
throw new IOException("expecting at least one column");
|
throw new IOException("expecting at least one column");
|
||||||
|
|
Loading…
Reference in New Issue