From 7d92cf2b3b90aaac615bead93267f228d762d9b1 Mon Sep 17 00:00:00 2001 From: Deepak Date: Thu, 22 May 2014 15:08:12 +0530 Subject: [PATCH] Update IndexGeneratorJob.java CombineTextInputFormat instead of TextInputFormat combines multiple splits for a single mapper and reduces the strain on hadoop platform. It greatly improves job completion time as there are fewer number of mappers to bookkeep. --- .../src/main/java/io/druid/indexer/IndexGeneratorJob.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java index f8633604be0..9d8a5fb6c33 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java @@ -62,6 +62,7 @@ import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.joda.time.DateTime; @@ -146,7 +147,7 @@ public class IndexGeneratorJob implements Jobby JobHelper.injectSystemProperties(job); - job.setInputFormatClass(TextInputFormat.class); + job.setInputFormatClass(CombineTextInputFormat.class); job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(Text.class);