Update IndexGeneratorJob.java

CombineTextInputFormat instead of TextInputFormat combines multiple splits for a single mapper and reduces the strain on hadoop platform. It greatly improves job completion time as there are fewer number of mappers to bookkeep.
This commit is contained in:
Deepak 2014-05-22 15:08:12 +05:30
parent de0a7b27e7
commit 7d92cf2b3b
1 changed files with 2 additions and 1 deletions

View File

@ -62,6 +62,7 @@ import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.joda.time.DateTime;
@ -146,7 +147,7 @@ public class IndexGeneratorJob implements Jobby
JobHelper.injectSystemProperties(job);
job.setInputFormatClass(TextInputFormat.class);
job.setInputFormatClass(CombineTextInputFormat.class);
job.setMapperClass(IndexGeneratorMapper.class);
job.setMapOutputValueClass(Text.class);