diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml index a7e0861998f..1a62f667c4a 100644 --- a/src/docbkx/book.xml +++ b/src/docbkx/book.xml @@ -1017,7 +1017,7 @@ TableMapReduceUtil.initTableMapperJob( job); TableMapReduceUtil.initTableReducerJob( targetTable, // output table - MyReducer.class, // reducer class + MyTableReducer.class, // reducer class job); job.setNumReduceTasks(1); // at least one, adjust as required @@ -1044,7 +1044,7 @@ public static class MyMapper extends TableMapper<Text, IntWritable> { In the reducer, the "ones" are counted (just like any other MR example that does this), and then emits a Put. -public static class MyReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> { +public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int i = 0; @@ -1058,10 +1058,55 @@ public static class MyReducer extends TableReducer<Text, IntWritable, Immutab } } - +
+ HBase MapReduce Summary to File Example + This very similar to the summary example above, with exception that this is using HBase as a MapReduce source + but HDFS as the sink. The differences are in the job setup and in the reducer. The mapper remains the same. + + +Configuration config = HBaseConfiguration.create(); +Job job = new Job(config,"ExampleSummaryToFile"); +job.setJarByClass(MySummaryFileJob.class); // class that contains mapper and reducer + +Scan scan = new Scan(); +scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs +scan.setCacheBlocks(false); // don't set to true for MR jobs +// set other scan attrs + +TableMapReduceUtil.initTableMapperJob( + sourceTable, // input table + scan, // Scan instance to control CF and attribute selection + MyMapper.class, // mapper class + Text.class, // mapper output key + IntWritable.class, // mapper output value + job); +job.setReducerClass(MyReducer.class); // reducer class +job.setNumReduceTasks(1); // at least one, adjust as required +FileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile")); // adjust directories as required + +boolean b = job.waitForCompletion(true); +if (!b) { + throw new IOException("error with job!"); +} + + As stated above, the previous Mapper can run unchanged with this example. + As for the Reducer, it is a "generic" Reducer instead of extending TableMapper and emitting Puts. + + public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> { + + public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { + int i = 0; + for (IntWritable val : values) { + i += val.get(); + } + context.write(key, new IntWritable(i)); + } +} +
+
Accessing Other HBase Tables in a MapReduce Job Although the framework currently allows one HBase table as input to a diff --git a/src/docbkx/troubleshooting.xml b/src/docbkx/troubleshooting.xml index 6a568233e0e..9b1a53dcb4f 100644 --- a/src/docbkx/troubleshooting.xml +++ b/src/docbkx/troubleshooting.xml @@ -535,6 +535,8 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j hadoop fs -dus /hbase/ ...returns the summarized disk utilization for all HBase objects. hadoop fs -dus /hbase/myTable ...returns the summarized disk utilization for the HBase table 'myTable'. hadoop fs -du /hbase/myTable ...returns a list of the regions under the HBase table 'myTable' and their disk utilization. + For more information on HDFS shell commands, see the HDFS FileSystem Shell documentation. +
Browsing HDFS for HBase Objects @@ -557,6 +559,9 @@ hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/j /<RegionServer> (RegionServers) /<HLog> (WAL HLog files for the RegionServer) + + See the HDFS User Guide for other non-shell diagnostic + utilities like fsck.
Use Cases