mirror of https://github.com/apache/druid.git
Update DetermineHashedPartitionsJob.java
CombineTextInputFormat instead of TextInputFormat combines multiple splits for a single mapper and reduces the strain on hadoop platform. It greatly improves job completion time as there are fewer number of mappers to bookkeep.
This commit is contained in:
parent
5ce80068d2
commit
de0a7b27e7
|
@ -49,6 +49,7 @@ import org.apache.hadoop.mapreduce.Job;
|
||||||
import org.apache.hadoop.mapreduce.Partitioner;
|
import org.apache.hadoop.mapreduce.Partitioner;
|
||||||
import org.apache.hadoop.mapreduce.Reducer;
|
import org.apache.hadoop.mapreduce.Reducer;
|
||||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
@ -91,7 +92,7 @@ public class DetermineHashedPartitionsJob implements Jobby
|
||||||
);
|
);
|
||||||
|
|
||||||
JobHelper.injectSystemProperties(groupByJob);
|
JobHelper.injectSystemProperties(groupByJob);
|
||||||
groupByJob.setInputFormatClass(TextInputFormat.class);
|
groupByJob.setInputFormatClass(CombineTextInputFormat.class);
|
||||||
groupByJob.setMapperClass(DetermineCardinalityMapper.class);
|
groupByJob.setMapperClass(DetermineCardinalityMapper.class);
|
||||||
groupByJob.setMapOutputKeyClass(LongWritable.class);
|
groupByJob.setMapOutputKeyClass(LongWritable.class);
|
||||||
groupByJob.setMapOutputValueClass(BytesWritable.class);
|
groupByJob.setMapOutputValueClass(BytesWritable.class);
|
||||||
|
|
Loading…
Reference in New Issue