diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml index 1b90bcab54c..b07c0a45446 100644 --- a/src/docbkx/book.xml +++ b/src/docbkx/book.xml @@ -1004,7 +1004,7 @@ public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put&
- HBase MapReduce Summary Example + HBase MapReduce Summary to HBase Example The following example uses HBase as a MapReduce source and sink with a summarization step. This example will count the number of distinct instances of a value in a table and write those summarized counts in another table. @@ -1116,7 +1116,7 @@ if (!b) {
- HBase MapReduce Summary Without Reducer + HBase MapReduce Summary to HBase Without Reducer It is also possible to perform summaries without a reducer - if you use HBase as the reducer. An HBase target table would need to exist for the job summary. The HTable method incrementColumnValue @@ -1128,6 +1128,40 @@ if (!b) { In the end, the summary results are in HBase.
+
+ HBase MapReduce Summary to RDBMS + Sometimes it is more appropriate to generate summaries to an RDBMS. For these cases, it is possible + to generate summaries directly to an RDBMS via a custom reducer. The setup method + can connect to an RDBMS (the connection information can be passed via custom parameters in the context) and the + cleanup method can close the connection. + + It is critical to understand that number of reducers for the job affects the summarization implementation, you + you'll have to design this into your reducer. Specifically, whether it is designed to run as a singleton (one reducer) + or multiple reducers. Neither is right or wrong, it depends on your use-case. + + + public static class MyRdbmsReducer extends Reducer<Text, IntWritable, Text, IntWritable> { + + private Connection c = null; + + public void setup(Context context) { + // create DB connection... + } + + public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { + // do summarization + // in this example the keys are Text, but this is just an example + } + + public void cleanup(Context context) { + // close db connection + } + +} + + In the end, the summary results are in HBase. + +