HBASE-13027 Ensure extension of TableInputFormatBase works.

* move mapreduce version of TableInputFormat tests out of mapred * add ability to get runnable job via MR test shims * correct the javadoc example for current APIs. * add tests the run a job based on the extending TableInputFormatBase (as given in the javadocs) * add tests that run jobs based on the javadocs from 0.98 * fall back to our own Connection if ussers of the deprecated table configuration have a managed connection.
2015-02-11 17:02:57 -06:00 · 2015-02-11 17:02:57 -06:00 · 9114edece1
parent e8d17da337
commit 9114edece1
7 changed files with 686 additions and 134 deletions
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionManager.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionManager.java
@ -709,7 +709,7 @@ class ConnectionManager {
    @Override
    public HTableInterface getTable(TableName tableName, ExecutorService pool) throws IOException {
      if (managed) {
-        throw new IOException("The connection has to be unmanaged.");
+        throw new NeedUnmanagedConnectionException();
      }
      return new HTable(tableName, this, tableConfig, rpcCallerFactory, rpcControllerFactory, pool);
    }
@ -744,7 +744,7 @@ class ConnectionManager {
    @Override
    public Admin getAdmin() throws IOException {
      if (managed) {
-        throw new IOException("The connection has to be unmanaged.");
+        throw new NeedUnmanagedConnectionException();
      }
      return new HBaseAdmin(this);
    }
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/NeedUnmanagedConnectionException.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/NeedUnmanagedConnectionException.java
@ -0,0 +1,32 @@
 /**
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.client;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import java.io.IOException;
 /**
 * Used for internal signalling that a Connection implementation needs to be
 * user-managed to be used for particular request types.
 */
@InterfaceAudience.Private
 public class NeedUnmanagedConnectionException extends IOException {
  private static final long serialVersionUID = 1876775844L;
  public NeedUnmanagedConnectionException() {
    super("The connection has to be unmanaged.");
  }
 }
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/TableInputFormatBase.java
@ -44,23 +44,25 @@ import org.apache.hadoop.mapred.Reporter;
 * <pre>
 *   class ExampleTIF extends TableInputFormatBase implements JobConfigurable {
 *
 *     @Override
 *     public void configure(JobConf job) {
- *       HTable exampleTable = new HTable(HBaseConfiguration.create(job),
+ *       try {
- *         Bytes.toBytes("exampleTable"));
+ *         HTable exampleTable = new HTable(HBaseConfiguration.create(job),
- *       // mandatory
+ *           Bytes.toBytes("exampleTable"));
- *       setHTable(exampleTable);
+ *         // mandatory
- *       Text[] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
+ *         setHTable(exampleTable);
- *         Bytes.toBytes("columnB") };
+ *         byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- *       // mandatory
+ *           Bytes.toBytes("columnB") };
- *       setInputColumns(inputColumns);
+ *         // mandatory
- *       RowFilterInterface exampleFilter = new RegExpRowFilter("keyPrefix.*");
+ *         setInputColumns(inputColumns);
- *       // optional
+ *         // optional
- *       setRowFilter(exampleFilter);
+ *         Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
 *         setRowFilter(exampleFilter);
 *       } catch (IOException exception) {
 *         throw new RuntimeException("Failed to configure for job.", exception);
 *       }
 *     }
- *
+ *   }
 *     public void validateInput(JobConf job) throws IOException {
 *     }
 *  }
 * </pre>
 */
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@ -38,7 +38,9 @@ import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.NeedUnmanagedConnectionException;
 import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
@ -68,17 +70,24 @@ import org.apache.hadoop.util.StringUtils;
 *
 *     private JobConf job;
 *
 *     @Override
 *     public void configure(JobConf job) {
- *       this.job = job;
+ *       try {
- *       Text[] inputColumns = new byte [][] { Bytes.toBytes("cf1:columnA"),
+ *         this.job = job;
- *         Bytes.toBytes("cf2") };
+ *         byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
- *       // mandatory
+ *           Bytes.toBytes("columnB") };
- *       setInputColumns(inputColumns);
+ *         // optional
- *       RowFilterInterface exampleFilter = new RegExpRowFilter("keyPrefix.*");
+ *         Scan scan = new Scan();
- *       // optional
+ *         for (byte[] family : inputColumns) {
- *       setRowFilter(exampleFilter);
+ *           scan.addFamily(family);
- *     }
+ *         }
- *     
+ *         Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
 *         scan.setFilter(exampleFilter);
 *         setScan(scan);
 *       } catch (IOException exception) {
 *         throw new RuntimeException("Failed to configure for job.", exception);
 *       }
 *
 *     protected void initialize() {
 *       Connection connection =
 *          ConnectionFactory.createConnection(HBaseConfiguration.create(job));
@ -86,10 +95,7 @@ import org.apache.hadoop.util.StringUtils;
 *       // mandatory
 *       initializeTable(connection, tableName);
 *    }
- *
+ *   }
 *     public void validateInput(JobConf job) throws IOException {
 *     }
 *  }
 * </pre>
 */
@InterfaceAudience.Public
@ -382,15 +388,31 @@ extends InputFormat<ImmutableBytesWritable, Result> {
  @Deprecated
  protected void setHTable(HTable table) throws IOException {
    this.table = table;
    this.regionLocator = table.getRegionLocator();
    this.connection = table.getConnection();
-    this.admin = this.connection.getAdmin();
+    try {
      this.regionLocator = table.getRegionLocator();
      this.admin = this.connection.getAdmin();
    } catch (NeedUnmanagedConnectionException exception) {
      LOG.warn("You are using an HTable instance that relies on an HBase-managed Connection. " +
          "This is usually due to directly creating an HTable, which is deprecated. Instead, you " +
          "should create a Connection object and then request a Table instance from it. If you " +
          "don't need the Table instance for your own use, you should instead use the " +
          "TableInputFormatBase.initalizeTable method directly.");
      LOG.info("Creating an additional unmanaged connection because user provided one can't be " +
          "used for administrative actions. We'll close it when we close out the table.");
      LOG.debug("Details about our failure to request an administrative interface.", exception);
      // Do we need a "copy the settings from this Connection" method? are things like the User
      // properly maintained by just looking again at the Configuration?
      this.connection = ConnectionFactory.createConnection(this.connection.getConfiguration());
      this.regionLocator = this.connection.getRegionLocator(table.getName());
      this.admin = this.connection.getAdmin();
    }
  }
  /**
   * Allows subclasses to initialize the table information.
   *
-   * @param connection  The {@link Connection} to the HBase cluster.
+   * @param connection  The {@link Connection} to the HBase cluster. MUST be unmanaged. We will close.
   * @param tableName  The {@link TableName} of the table to process. 
   * @throws IOException 
   */
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapred/TestTableInputFormat.java
@ -35,14 +35,30 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.RegexStringComparator;
 import org.apache.hadoop.hbase.filter.RowFilter;
 import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.JobConfigurable;
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.lib.NullOutputFormat;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
@ -61,6 +77,7 @@ public class TestTableInputFormat {
  private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
  private static MiniMRCluster mrCluster;
  static final byte[] FAMILY = Bytes.toBytes("family");
  private static final byte[][] columns = new byte[][] { FAMILY };
@ -68,10 +85,12 @@ public class TestTableInputFormat {
  @BeforeClass
  public static void beforeClass() throws Exception {
    UTIL.startMiniCluster();
    mrCluster = UTIL.startMiniMapReduceCluster();
  }
  @AfterClass
  public static void afterClass() throws Exception {
    UTIL.shutdownMiniMapReduceCluster();
    UTIL.shutdownMiniCluster();
  }
@ -90,12 +109,27 @@ public class TestTableInputFormat {
   * @throws IOException
   */
  public static Table createTable(byte[] tableName) throws IOException {
-    Table table = UTIL.createTable(TableName.valueOf(tableName), new byte[][]{FAMILY});
+    return createTable(tableName, new byte[][] { FAMILY });
  }
  /**
   * Setup a table with two rows and values per column family.
   * 
   * @param tableName
   * @return
   * @throws IOException
   */
  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
    Put p = new Put("aaa".getBytes());
-    p.add(FAMILY, null, "value aaa".getBytes());
+    for (byte[] family : families) {
      p.add(family, null, "value aaa".getBytes());
    }
    table.put(p);
    p = new Put("bbb".getBytes());
-    p.add(FAMILY, null, "value bbb".getBytes());
+    for (byte[] family : families) {
      p.add(family, null, "value bbb".getBytes());
    }
    table.put(p);
    return table;
  }
@ -150,46 +184,6 @@ public class TestTableInputFormat {
    assertFalse(more);
  }
  /**
   * Create table data and run tests on specified htable using the
   * o.a.h.hbase.mapreduce API.
   * 
   * @param table
   * @throws IOException
   * @throws InterruptedException
   */
  static void runTestMapreduce(Table table) throws IOException,
      InterruptedException {
    org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr = 
        new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
    Scan s = new Scan();
    s.setStartRow("aaa".getBytes());
    s.setStopRow("zzz".getBytes());
    s.addFamily(FAMILY);
    trr.setScan(s);
    trr.setHTable(table);
    trr.initialize(null, null);
    Result r = new Result();
    ImmutableBytesWritable key = new ImmutableBytesWritable();
    boolean more = trr.nextKeyValue();
    assertTrue(more);
    key = trr.getCurrentKey();
    r = trr.getCurrentValue();
    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
    more = trr.nextKeyValue();
    assertTrue(more);
    key = trr.getCurrentKey();
    r = trr.getCurrentValue();
    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
    // no more data
    more = trr.nextKeyValue();
    assertFalse(more);
  }
  /**
   * Create a table that IOE's on first scanner next call
   * 
@ -320,70 +314,85 @@ public class TestTableInputFormat {
  }
  /**
-   * Run test assuming no errors using newer mapreduce api
+   * Verify the example we present in javadocs on TableInputFormatBase
   * 
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
-  public void testTableRecordReaderMapreduce() throws IOException,
+  public void testExtensionOfTableInputFormatBase() throws IOException {
-      InterruptedException {
+    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
-    Table table = createTable("table1-mr".getBytes());
+    final Table table = createTable(Bytes.toBytes("exampleTable"),
-    runTestMapreduce(table);
+      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
    final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
    job.setInputFormat(ExampleTIF.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(ExampleVerifier.class);
    job.setNumReduceTasks(0);
    LOG.debug("submitting job.");
    final RunningJob run = JobClient.runJob(job);
    assertTrue("job failed!", run.isSuccessful());
    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
    assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
    assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
    assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
  }
-  /**
+  public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
-   * Run test assuming Scanner IOException failure using newer mapreduce api
+
-   * 
+    @Override
-   * @throws IOException
+    public void configure(JobConf conf) {
-   * @throws InterruptedException
+    }
-   */
+
-  @Test
+    @Override
-  public void testTableRecordReaderScannerFailMapreduce() throws IOException,
+    public void map(ImmutableBytesWritable key, Result value,
-      InterruptedException {
+        OutputCollector<NullWritable,NullWritable> output,
-    Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
+        Reporter reporter) throws IOException {
-    runTestMapreduce(htable);
+      for (Cell cell : value.listCells()) {
        reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
            .increment(1l);
        reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
            .increment(1l);
        reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
            .increment(1l);
      }
    }
    @Override
    public void close() {
    }
  }
-  /**
+  public static class ExampleTIF extends TableInputFormatBase implements JobConfigurable {
   * Run test assuming Scanner IOException failure using newer mapreduce api
   * 
   * @throws IOException
   * @throws InterruptedException
   */
  @Test(expected = IOException.class)
  public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
      InterruptedException {
    Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
    runTestMapreduce(htable);
  }
-  /**
+    @Override
-   * Run test assuming UnknownScannerException (which is a type of
+    public void configure(JobConf job) {
-   * DoNotRetryIOException) using newer mapreduce api
+      try {
-   * 
+        HTable exampleTable = new HTable(HBaseConfiguration.create(job),
-   * @throws InterruptedException
+          Bytes.toBytes("exampleTable"));
-   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
+        // mandatory
-   */
+        setHTable(exampleTable);
-  @Test
+        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
-  public void testTableRecordReaderScannerTimeoutMapreduce()
+          Bytes.toBytes("columnB") };
-      throws IOException, InterruptedException {
+        // mandatory
-    Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
+        setInputColumns(inputColumns);
-    runTestMapreduce(htable);
+        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
-  }
+        // optional
        setRowFilter(exampleFilter);
      } catch (IOException exception) {
        throw new RuntimeException("Failed to configure for job.", exception);
      }
    }
  /**
   * Run test assuming UnknownScannerException (which is a type of
   * DoNotRetryIOException) using newer mapreduce api
   * 
   * @throws InterruptedException
   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
   */
  @Test(expected = org.apache.hadoop.hbase.DoNotRetryIOException.class)
  public void testTableRecordReaderScannerTimeoutMapreduceTwice()
      throws IOException, InterruptedException {
    Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
    runTestMapreduce(htable);
  }
 }
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MapreduceTestingShim.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/MapreduceTestingShim.java
@ -52,6 +52,8 @@ abstract public class MapreduceTestingShim {
  abstract public JobContext newJobContext(Configuration jobConf)
      throws IOException;
  abstract public Job newJob(Configuration conf) throws IOException;
  abstract public JobConf obtainJobConf(MiniMRCluster cluster);
@ -66,6 +68,10 @@ abstract public class MapreduceTestingShim {
    return instance.obtainJobConf(cluster);
  }
  public static Job createJob(Configuration conf) throws IOException {
    return instance.newJob(conf);
  }
  public static String getMROutputDirProp() {
    return instance.obtainMROutputDirProp();
  }
@ -84,6 +90,20 @@ abstract public class MapreduceTestingShim {
            "Failed to instantiate new JobContext(jobConf, new JobID())", e);
      }
    }
    @Override
    public Job newJob(Configuration conf) throws IOException {
      // Implementing:
      // return new Job(conf);
      Constructor<Job> c;
      try {
        c = Job.class.getConstructor(Configuration.class);
        return c.newInstance(conf);
      } catch (Exception e) {
        throw new IllegalStateException(
            "Failed to instantiate new Job(conf)", e);
      }
    }
    public JobConf obtainJobConf(MiniMRCluster cluster) {
      if (cluster == null) return null;
@ -110,11 +130,16 @@ abstract public class MapreduceTestingShim {
  private static class MapreduceV2Shim extends MapreduceTestingShim {
    public JobContext newJobContext(Configuration jobConf) {
      return newJob(jobConf);
    }
    @Override
    public Job newJob(Configuration jobConf) {
      // Implementing:
      // return Job.getInstance(jobConf);
      try {
        Method m = Job.class.getMethod("getInstance", Configuration.class);
-        return (JobContext) m.invoke(null, jobConf); // static method, then arg
+        return (Job) m.invoke(null, jobConf); // static method, then arg
      } catch (Exception e) {
        e.printStackTrace();
        throw new IllegalStateException(
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableInputFormat.java
@ -0,0 +1,462 @@
 /**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.mapreduce;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyObject;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.RegexStringComparator;
 import org.apache.hadoop.hbase.filter.RowFilter;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.JobConfigurable;
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper.Context;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 /**
 * This tests the TableInputFormat and its recovery semantics
 *
 */
@Category(LargeTests.class)
 public class TestTableInputFormat {
  private static final Log LOG = LogFactory.getLog(TestTableInputFormat.class);
  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
  private static MiniMRCluster mrCluster;
  static final byte[] FAMILY = Bytes.toBytes("family");
  private static final byte[][] columns = new byte[][] { FAMILY };
  @BeforeClass
  public static void beforeClass() throws Exception {
    UTIL.startMiniCluster();
    mrCluster = UTIL.startMiniMapReduceCluster();
  }
  @AfterClass
  public static void afterClass() throws Exception {
    UTIL.shutdownMiniMapReduceCluster();
    UTIL.shutdownMiniCluster();
  }
  @Before
  public void before() throws IOException {
    LOG.info("before");
    UTIL.ensureSomeRegionServersAvailable(1);
    LOG.info("before done");
  }
  /**
   * Setup a table with two rows and values.
   *
   * @param tableName
   * @return
   * @throws IOException
   */
  public static Table createTable(byte[] tableName) throws IOException {
    return createTable(tableName, new byte[][] { FAMILY });
  }
  /**
   * Setup a table with two rows and values per column family.
   *
   * @param tableName
   * @return
   * @throws IOException
   */
  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
    Put p = new Put("aaa".getBytes());
    for (byte[] family : families) {
      p.add(family, null, "value aaa".getBytes());
    }
    table.put(p);
    p = new Put("bbb".getBytes());
    for (byte[] family : families) {
      p.add(family, null, "value bbb".getBytes());
    }
    table.put(p);
    return table;
  }
  /**
   * Verify that the result and key have expected values.
   *
   * @param r
   * @param key
   * @param expectedKey
   * @param expectedValue
   * @return
   */
  static boolean checkResult(Result r, ImmutableBytesWritable key,
      byte[] expectedKey, byte[] expectedValue) {
    assertEquals(0, key.compareTo(expectedKey));
    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
    byte[] value = vals.values().iterator().next();
    assertTrue(Arrays.equals(value, expectedValue));
    return true; // if succeed
  }
  /**
   * Create table data and run tests on specified htable using the
   * o.a.h.hbase.mapreduce API.
   *
   * @param table
   * @throws IOException
   * @throws InterruptedException
   */
  static void runTestMapreduce(Table table) throws IOException,
      InterruptedException {
    org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
        new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
    Scan s = new Scan();
    s.setStartRow("aaa".getBytes());
    s.setStopRow("zzz".getBytes());
    s.addFamily(FAMILY);
    trr.setScan(s);
    trr.setHTable(table);
    trr.initialize(null, null);
    Result r = new Result();
    ImmutableBytesWritable key = new ImmutableBytesWritable();
    boolean more = trr.nextKeyValue();
    assertTrue(more);
    key = trr.getCurrentKey();
    r = trr.getCurrentValue();
    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
    more = trr.nextKeyValue();
    assertTrue(more);
    key = trr.getCurrentKey();
    r = trr.getCurrentValue();
    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
    // no more data
    more = trr.nextKeyValue();
    assertFalse(more);
  }
  /**
   * Create a table that IOE's on first scanner next call
   *
   * @throws IOException
   */
  static Table createIOEScannerTable(byte[] name, final int failCnt)
      throws IOException {
    // build up a mock scanner stuff to fail the first time
    Answer<ResultScanner> a = new Answer<ResultScanner>() {
      int cnt = 0;
      @Override
      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
        // first invocation return the busted mock scanner
        if (cnt++ < failCnt) {
          // create mock ResultScanner that always fails.
          Scan scan = mock(Scan.class);
          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
          ResultScanner scanner = mock(ResultScanner.class);
          // simulate TimeoutException / IOException
          doThrow(new IOException("Injected exception")).when(scanner).next();
          return scanner;
        }
        // otherwise return the real scanner.
        return (ResultScanner) invocation.callRealMethod();
      }
    };
    Table htable = spy(createTable(name));
    doAnswer(a).when(htable).getScanner((Scan) anyObject());
    return htable;
  }
  /**
   * Create a table that throws a DoNoRetryIOException on first scanner next
   * call
   *
   * @throws IOException
   */
  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
      throws IOException {
    // build up a mock scanner stuff to fail the first time
    Answer<ResultScanner> a = new Answer<ResultScanner>() {
      int cnt = 0;
      @Override
      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
        // first invocation return the busted mock scanner
        if (cnt++ < failCnt) {
          // create mock ResultScanner that always fails.
          Scan scan = mock(Scan.class);
          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
          ResultScanner scanner = mock(ResultScanner.class);
          invocation.callRealMethod(); // simulate UnknownScannerException
          doThrow(
              new UnknownScannerException("Injected simulated TimeoutException"))
              .when(scanner).next();
          return scanner;
        }
        // otherwise return the real scanner.
        return (ResultScanner) invocation.callRealMethod();
      }
    };
    Table htable = spy(createTable(name));
    doAnswer(a).when(htable).getScanner((Scan) anyObject());
    return htable;
  }
  /**
   * Run test assuming no errors using newer mapreduce api
   *
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
  public void testTableRecordReaderMapreduce() throws IOException,
      InterruptedException {
    Table table = createTable("table1-mr".getBytes());
    runTestMapreduce(table);
  }
  /**
   * Run test assuming Scanner IOException failure using newer mapreduce api
   * 
   * @throws IOException
   * @throws InterruptedException
   */
  @Test
  public void testTableRecordReaderScannerFailMapreduce() throws IOException,
      InterruptedException {
    Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
    runTestMapreduce(htable);
  }
  /**
   * Run test assuming Scanner IOException failure using newer mapreduce api
   * 
   * @throws IOException
   * @throws InterruptedException
   */
  @Test(expected = IOException.class)
  public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
      InterruptedException {
    Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
    runTestMapreduce(htable);
  }
  /**
   * Run test assuming UnknownScannerException (which is a type of
   * DoNotRetryIOException) using newer mapreduce api
   * 
   * @throws InterruptedException
   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
   */
  @Test
  public void testTableRecordReaderScannerTimeoutMapreduce()
      throws IOException, InterruptedException {
    Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
    runTestMapreduce(htable);
  }
  /**
   * Run test assuming UnknownScannerException (which is a type of
   * DoNotRetryIOException) using newer mapreduce api
   * 
   * @throws InterruptedException
   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
   */
  @Test(expected = org.apache.hadoop.hbase.DoNotRetryIOException.class)
  public void testTableRecordReaderScannerTimeoutMapreduceTwice()
      throws IOException, InterruptedException {
    Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
    runTestMapreduce(htable);
  }
  /**
   * Verify the example we present in javadocs on TableInputFormatBase
   */
  @Test
  public void testExtensionOfTableInputFormatBase()
      throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
    final Table htable = createTable(Bytes.toBytes("exampleTable"),
      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
    testInputFormat(ExampleTIF.class);
  }
  @Test
  public void testDeprecatedExtensionOfTableInputFormatBase()
      throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
        "using the approach documented in 0.98.");
    final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"),
      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
    testInputFormat(ExampleDeprecatedTIF.class);
  }
  void testInputFormat(Class<? extends InputFormat> clazz)
      throws IOException, InterruptedException, ClassNotFoundException {
    final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
    job.setInputFormatClass(clazz);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setMapperClass(ExampleVerifier.class);
    job.setNumReduceTasks(0);
    LOG.debug("submitting job.");
    assertTrue("job failed!", job.waitForCompletion(true));
    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
    assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
    assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
    assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
  }
  public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
    @Override
    public void map(ImmutableBytesWritable key, Result value, Context context)
        throws IOException {
      for (Cell cell : value.listCells()) {
        context.getCounter(TestTableInputFormat.class.getName() + ":row",
            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
            .increment(1l);
        context.getCounter(TestTableInputFormat.class.getName() + ":family",
            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
            .increment(1l);
        context.getCounter(TestTableInputFormat.class.getName() + ":value",
            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
            .increment(1l);
      }
    }
  }
  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
    @Override
    public void configure(JobConf job) {
      try {
        HTable exampleTable = new HTable(HBaseConfiguration.create(job),
          Bytes.toBytes("exampleDeprecatedTable"));
        // mandatory
        setHTable(exampleTable);
        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
          Bytes.toBytes("columnB") };
        // optional
        Scan scan = new Scan();
        for (byte[] family : inputColumns) {
          scan.addFamily(family);
        }
        Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
        scan.setFilter(exampleFilter);
        setScan(scan);
      } catch (IOException exception) {
        throw new RuntimeException("Failed to configure for job.", exception);
      }
    }
  }
  public static class ExampleTIF extends TableInputFormatBase implements JobConfigurable {
    private JobConf job;
    @Override
    public void configure(JobConf job) {
      this.job = job;
      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
        Bytes.toBytes("columnB") };
      //optional
      Scan scan = new Scan();
      for (byte[] family : inputColumns) {
        scan.addFamily(family);
      }
      Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
      scan.setFilter(exampleFilter);
      setScan(scan);
    }
    @Override
    protected void initialize() {
      if (job == null) {
        throw new IllegalStateException("must have already gotten the JobConf before initialize " +
            "is called.");
      }
      try {
        Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
        TableName tableName = TableName.valueOf("exampleTable");
        // mandatory
        initializeTable(connection, tableName);
      } catch (IOException exception) {
        throw new RuntimeException("Failed to initialize.", exception);
      }
    }
  }
 }