From bea004eaeb7ba33bf324ef3e7065cfdd614d8198 Mon Sep 17 00:00:00 2001 From: Kai Zheng Date: Sun, 9 Oct 2016 15:33:26 +0600 Subject: [PATCH] MAPREDUCE-6780. Add support for HDFS directory with erasure code policy to TeraGen and TeraSort. Contributed by Sammi Chen --- .../hadoop/examples/terasort/TeraGen.java | 3 +++ .../examples/terasort/TeraOutputFormat.java | 20 ++++++++++++++++--- .../hadoop/examples/terasort/TeraSort.java | 3 +++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java index 22fe3443275..7fbb22af636 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java @@ -246,6 +246,9 @@ public class TeraGen extends Configured implements Tool { private static void usage() throws IOException { System.err.println("teragen "); + System.err.println("If you want to generate data and store them as " + + "erasure code striping file, just make sure that the parent dir " + + "of has erasure code policy set"); } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraOutputFormat.java index fd3ea78fdcd..73c446d7e7e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraOutputFormat.java @@ -20,6 +20,8 @@ package org.apache.hadoop.examples.terasort; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -40,6 +42,7 @@ import org.apache.hadoop.mapreduce.security.TokenCache; * An output format that writes the key and value appended together. */ public class TeraOutputFormat extends FileOutputFormat { + private static final Log LOG = LogFactory.getLog(TeraOutputFormat.class); private OutputCommitter committer = null; /** @@ -74,10 +77,22 @@ public class TeraOutputFormat extends FileOutputFormat { out.write(key.getBytes(), 0, key.getLength()); out.write(value.getBytes(), 0, value.getLength()); } - + public void close(TaskAttemptContext context) throws IOException { if (finalSync) { - out.hsync(); + try { + out.hsync(); + } catch (UnsupportedOperationException e) { + /* + * Currently, hsync operation on striping file with erasure code + * policy is not supported yet. So this is a workaround to make + * teragen and terasort to support directory with striping files. In + * future, if the hsync operation is supported on striping file, this + * workaround should be removed. + */ + LOG.info("Operation hsync is not supported so far on path with " + + "erasure code policy set"); + } } out.close(); } @@ -135,5 +150,4 @@ public class TeraOutputFormat extends FileOutputFormat { } return committer; } - } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java index 9beff3e92c1..040d13ffb6f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java @@ -287,6 +287,9 @@ public class TeraSort extends Configured implements Tool { for (TeraSortConfigKeys teraSortConfigKeys : TeraSortConfigKeys.values()) { System.err.println(teraSortConfigKeys.toString()); } + System.err.println("If you want to store the output data as " + + "erasure code striping file, just make sure that the parent dir " + + "of has erasure code policy set"); } public int run(String[] args) throws Exception {