diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml
index a7e0861998f..1a62f667c4a 100644
--- a/src/docbkx/book.xml
+++ b/src/docbkx/book.xml
@@ -1017,7 +1017,7 @@ TableMapReduceUtil.initTableMapperJob(
job);
TableMapReduceUtil.initTableReducerJob(
targetTable, // output table
- MyReducer.class, // reducer class
+ MyTableReducer.class, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
@@ -1044,7 +1044,7 @@ public static class MyMapper extends TableMapper<Text, IntWritable> {
In the reducer, the "ones" are counted (just like any other MR example that does this), and then emits a Put.
-public static class MyReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
+public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int i = 0;
@@ -1058,10 +1058,55 @@ public static class MyReducer extends TableReducer<Text, IntWritable, Immutab
}
}
-
+
+ HBase MapReduce Summary to File Example
+ This very similar to the summary example above, with exception that this is using HBase as a MapReduce source
+ but HDFS as the sink. The differences are in the job setup and in the reducer. The mapper remains the same.
+
+
+Configuration config = HBaseConfiguration.create();
+Job job = new Job(config,"ExampleSummaryToFile");
+job.setJarByClass(MySummaryFileJob.class); // class that contains mapper and reducer
+
+Scan scan = new Scan();
+scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
+scan.setCacheBlocks(false); // don't set to true for MR jobs
+// set other scan attrs
+
+TableMapReduceUtil.initTableMapperJob(
+ sourceTable, // input table
+ scan, // Scan instance to control CF and attribute selection
+ MyMapper.class, // mapper class
+ Text.class, // mapper output key
+ IntWritable.class, // mapper output value
+ job);
+job.setReducerClass(MyReducer.class); // reducer class
+job.setNumReduceTasks(1); // at least one, adjust as required
+FileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile")); // adjust directories as required
+
+boolean b = job.waitForCompletion(true);
+if (!b) {
+ throw new IOException("error with job!");
+}
+
+ As stated above, the previous Mapper can run unchanged with this example.
+ As for the Reducer, it is a "generic" Reducer instead of extending TableMapper and emitting Puts.
+
+ public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
+
+ public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
+ int i = 0;
+ for (IntWritable val : values) {
+ i += val.get();
+ }
+ context.write(key, new IntWritable(i));
+ }
+}
+
+
Accessing Other HBase Tables in a MapReduce JobAlthough the framework currently allows one HBase table as input to a
diff --git a/src/docbkx/troubleshooting.xml b/src/docbkx/troubleshooting.xml
index 6a568233e0e..9b1a53dcb4f 100644
--- a/src/docbkx/troubleshooting.xml
+++ b/src/docbkx/troubleshooting.xml
@@ -535,6 +535,8 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j
hadoop fs -dus /hbase/ ...returns the summarized disk utilization for all HBase objects. hadoop fs -dus /hbase/myTable ...returns the summarized disk utilization for the HBase table 'myTable'. hadoop fs -du /hbase/myTable ...returns a list of the regions under the HBase table 'myTable' and their disk utilization.
+ For more information on HDFS shell commands, see the HDFS FileSystem Shell documentation.
+ Browsing HDFS for HBase Objects
@@ -557,6 +559,9 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j
/<RegionServer> (RegionServers)
/<HLog> (WAL HLog files for the RegionServer)
+
+ See the HDFS User Guide for other non-shell diagnostic
+ utilities like fsck.
Use Cases