MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit subclass (Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1517046 13f79535-47bb-0310-9956-ffa450edef68
2013-08-23 21:23:10 +00:00 · 2013-08-23 21:23:10 +00:00 · 9ee38f3a84
parent 15632cd76f
commit 9ee38f3a84
3 changed files with 7 additions and 51 deletions
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -181,6 +181,9 @@ Release 2.1.1-beta - UNRELEASED
  IMPROVEMENTS
    MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit
    subclass (Sandy Ryza)
  OPTIMIZATIONS
    MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java
@ -60,48 +60,6 @@ public class TeraInputFormat extends FileInputFormat<Text,Text> {
  private static MRJobConfig lastContext = null;
  private static List<InputSplit> lastResult = null;
  static class TeraFileSplit extends FileSplit {
    static private String[] ZERO_LOCATIONS = new String[0];
    private String[] locations;
    public TeraFileSplit() {
      locations = ZERO_LOCATIONS;
    }
    public TeraFileSplit(Path file, long start, long length, String[] hosts) {
      super(file, start, length, hosts);
      try {
        locations = super.getLocations();
      } catch (IOException e) {
        locations = ZERO_LOCATIONS;
      }
    }
    // XXXXXX should this also be null-protected?
    protected void setLocations(String[] hosts) {
      locations = hosts;
    }
    @Override
    public String[] getLocations() {
      return locations;
    }
    public String toString() {
      StringBuffer result = new StringBuffer();
      result.append(getPath());
      result.append(" from ");
      result.append(getStart());
      result.append(" length ");
      result.append(getLength());
      for(String host: getLocations()) {
        result.append(" ");
        result.append(host);
      }
      return result.toString();
    }
  }
  static class TextSampler implements IndexedSortable {
    private ArrayList<Text> records = new ArrayList<Text>();
@ -325,11 +283,6 @@ public boolean nextKeyValue() throws IOException {
    return new TeraRecordReader();
  }
  protected FileSplit makeSplit(Path file, long start, long length, 
                                String[] hosts) {
    return new TeraFileSplit(file, start, length, hosts);
  }
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    if (job == lastContext) {
@ -343,7 +296,7 @@ public List<InputSplit> getSplits(JobContext job) throws IOException {
    System.out.println("Spent " + (t2 - t1) + "ms computing base-splits.");
    if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) {
      TeraScheduler scheduler = new TeraScheduler(
-        lastResult.toArray(new TeraFileSplit[0]), job.getConfiguration());
+        lastResult.toArray(new FileSplit[0]), job.getConfiguration());
      lastResult = scheduler.getNewFileSplits();
      t3 = System.currentTimeMillis(); 
      System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits.");
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java
@ -24,7 +24,6 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.examples.terasort.TeraInputFormat.TeraFileSplit;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
@ -214,8 +213,9 @@ public List<InputSplit> getNewFileSplits() throws IOException {
    for(int i=0; i < splits.length; ++i) {
      if (splits[i].isAssigned) {
        // copy the split and fix up the locations
-        ((TeraFileSplit) realSplits[i]).setLocations
+        String[] newLocations = {splits[i].locations.get(0).hostname};
-           (new String[]{splits[i].locations.get(0).hostname});
+        realSplits[i] = new FileSplit(realSplits[i].getPath(),
            realSplits[i].getStart(), realSplits[i].getLength(), newLocations);
        result[left++] = realSplits[i];
      } else {
        result[right--] = realSplits[i];