HADOOP-2234 TableInputFormat erroneously aggregates map values

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@599643 13f79535-47bb-0310-9956-ffa450edef68
2007-11-30 00:01:25 +00:00 · 2007-11-30 00:01:25 +00:00 · bf176b63f3
parent b8291d673e
commit bf176b63f3
2 changed files with 21 additions and 32 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -40,6 +40,7 @@ Trunk (unreleased changes)
   HADOOP-2253 getRow can return HBASE::DELETEVAL cells
               (Bryan Duxbury via Stack)
   HADOOP-2295 Fix assigning a region to multiple servers
   HADOOP-2234 TableInputFormat erroneously aggregates map values
  IMPROVEMENTS
   HADOOP-2401 Add convenience put method that takes writable
--- a/src/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
+++ b/src/java/org/apache/hadoop/hbase/mapred/TableInputFormat.java
@ -49,7 +49,6 @@ import org.apache.log4j.Logger;
 */
 public class TableInputFormat
 implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {  
  static final Logger LOG = Logger.getLogger(TableInputFormat.class.getName());
  /**
@ -67,9 +66,9 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
   * return (HStoreKey, MapWritable<Text, ImmutableBytesWritable>) pairs
   */
  class TableRecordReader implements RecordReader<HStoreKey, MapWritable> {
-    private HScannerInterface m_scanner;
+    private final HScannerInterface m_scanner;
-    private SortedMap<Text, byte[]> m_row; // current buffer
+    // current buffer
-    private Text m_endRow;
+    private final SortedMap<Text, byte[]> m_row = new TreeMap<Text, byte[]>();
    /**
     * Constructor
@ -78,14 +77,15 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
     * @throws IOException
     */
    public TableRecordReader(Text startRow, Text endRow) throws IOException {
-      m_row = new TreeMap<Text, byte[]>();
+      if (endRow != null && endRow.getLength() > 0) {
-      m_scanner = m_table.obtainScanner(m_cols, startRow);
+        this.m_scanner = m_table.obtainScanner(m_cols, startRow, endRow);
-      m_endRow = endRow;
+      } else {
        this.m_scanner = m_table.obtainScanner(m_cols, startRow);
      }
    }
    /** {@inheritDoc} */
    public void close() throws IOException {
-      m_scanner.close();
+      this.m_scanner.close();
    }
    /**
@ -132,33 +132,26 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
     */
    @SuppressWarnings("unchecked")
    public boolean next(HStoreKey key, MapWritable value) throws IOException {
-      m_row.clear();
+      this.m_row.clear();
      HStoreKey tKey = key;
-      boolean hasMore = m_scanner.next(tKey, m_row);
+      boolean hasMore = this.m_scanner.next(tKey, this.m_row);
      if (hasMore) {
-        if(m_endRow.getLength() > 0 &&
+        // Clear value to remove content added by previous call to next.
-            (tKey.getRow().compareTo(m_endRow) >= 0)) {
+        value.clear();
-          
+        for (Map.Entry<Text, byte[]> e: this.m_row.entrySet()) {
          hasMore = false;
        } else {
          for(Map.Entry<Text, byte[]> e: m_row.entrySet()) {
          value.put(e.getKey(), new ImmutableBytesWritable(e.getValue()));
        }
      }
      }
      return hasMore;
    }
  }
  /** {@inheritDoc} */
  public RecordReader<HStoreKey, MapWritable> getRecordReader(
      InputSplit split,
      @SuppressWarnings("unused") JobConf job,
-      @SuppressWarnings("unused") Reporter reporter) throws IOException {
+      @SuppressWarnings("unused") Reporter reporter)
-    
+  throws IOException {  
    TableSplit tSplit = (TableSplit)split;
    return new TableRecordReader(tSplit.getStartRow(), tSplit.getEndRow());
  }
@ -185,7 +178,6 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
    return splits;
  }
  /** {@inheritDoc} */
  public void configure(JobConf job) {
    Path[] tableNames = job.getInputPaths();
    m_tableName = new Text(tableNames[0].getName());
@ -202,18 +194,14 @@ implements InputFormat<HStoreKey, MapWritable>, JobConfigurable {
    }
  }
  /** {@inheritDoc} */
  public void validateInput(JobConf job) throws IOException {
    // expecting exactly one path
    Path[] tableNames = job.getInputPaths();
    if(tableNames == null || tableNames.length > 1) {
      throw new IOException("expecting one table name");
    }
    // expecting at least one column
    String colArg = job.get(COLUMN_LIST);
    if(colArg == null || colArg.length() == 0) {
      throw new IOException("expecting at least one column");