From 5b8b923cd1700f808d5e958819c52e70c626fee4 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Fri, 23 Aug 2013 21:28:16 +0000 Subject: [PATCH] MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit subclass (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1517055 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../examples/terasort/TeraInputFormat.java | 49 +------------------ .../examples/terasort/TeraScheduler.java | 6 +-- 3 files changed, 7 insertions(+), 51 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 08b830c617e..ff9b51511a4 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -44,6 +44,9 @@ Release 2.1.1-beta - UNRELEASED IMPROVEMENTS + MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit + subclass (Sandy Ryza) + OPTIMIZATIONS MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java index f957ad9e4f6..88b12dd1ff4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java @@ -60,48 +60,6 @@ public class TeraInputFormat extends FileInputFormat { private static MRJobConfig lastContext = null; private static List lastResult = null; - static class TeraFileSplit extends FileSplit { - static private String[] ZERO_LOCATIONS = new String[0]; - - private String[] locations; - - public TeraFileSplit() { - locations = ZERO_LOCATIONS; - } - public TeraFileSplit(Path file, long start, long length, String[] hosts) { - super(file, start, length, hosts); - try { - locations = super.getLocations(); - } catch (IOException e) { - locations = ZERO_LOCATIONS; - } - } - - // XXXXXX should this also be null-protected? - protected void setLocations(String[] hosts) { - locations = hosts; - } - - @Override - public String[] getLocations() { - return locations; - } - - public String toString() { - StringBuffer result = new StringBuffer(); - result.append(getPath()); - result.append(" from "); - result.append(getStart()); - result.append(" length "); - result.append(getLength()); - for(String host: getLocations()) { - result.append(" "); - result.append(host); - } - return result.toString(); - } - } - static class TextSampler implements IndexedSortable { private ArrayList records = new ArrayList(); @@ -325,11 +283,6 @@ public class TeraInputFormat extends FileInputFormat { return new TeraRecordReader(); } - protected FileSplit makeSplit(Path file, long start, long length, - String[] hosts) { - return new TeraFileSplit(file, start, length, hosts); - } - @Override public List getSplits(JobContext job) throws IOException { if (job == lastContext) { @@ -343,7 +296,7 @@ public class TeraInputFormat extends FileInputFormat { System.out.println("Spent " + (t2 - t1) + "ms computing base-splits."); if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) { TeraScheduler scheduler = new TeraScheduler( - lastResult.toArray(new TeraFileSplit[0]), job.getConfiguration()); + lastResult.toArray(new FileSplit[0]), job.getConfiguration()); lastResult = scheduler.getNewFileSplits(); t3 = System.currentTimeMillis(); System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits."); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java index 82a451246c7..7095dd7d28e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java @@ -24,7 +24,6 @@ import java.util.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.examples.terasort.TeraInputFormat.TeraFileSplit; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig; @@ -214,8 +213,9 @@ class TeraScheduler { for(int i=0; i < splits.length; ++i) { if (splits[i].isAssigned) { // copy the split and fix up the locations - ((TeraFileSplit) realSplits[i]).setLocations - (new String[]{splits[i].locations.get(0).hostname}); + String[] newLocations = {splits[i].locations.get(0).hostname}; + realSplits[i] = new FileSplit(realSplits[i].getPath(), + realSplits[i].getStart(), realSplits[i].getLength(), newLocations); result[left++] = realSplits[i]; } else { result[right--] = realSplits[i];