MAPREDUCE-6780. Add support for HDFS directory with erasure code policy to TeraGen and TeraSort. Contributed by Sammi Chen

This commit is contained in:
Kai Zheng 2016-10-09 15:33:26 +06:00
parent ec0b70716c
commit bea004eaeb
3 changed files with 23 additions and 3 deletions

View File

@ -246,6 +246,9 @@ public class TeraGen extends Configured implements Tool {
private static void usage() throws IOException { private static void usage() throws IOException {
System.err.println("teragen <num rows> <output dir>"); System.err.println("teragen <num rows> <output dir>");
System.err.println("If you want to generate data and store them as " +
"erasure code striping file, just make sure that the parent dir " +
"of <output dir> has erasure code policy set");
} }
/** /**

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.examples.terasort;
import java.io.IOException; import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
@ -40,6 +42,7 @@ import org.apache.hadoop.mapreduce.security.TokenCache;
* An output format that writes the key and value appended together. * An output format that writes the key and value appended together.
*/ */
public class TeraOutputFormat extends FileOutputFormat<Text,Text> { public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
private static final Log LOG = LogFactory.getLog(TeraOutputFormat.class);
private OutputCommitter committer = null; private OutputCommitter committer = null;
/** /**
@ -74,10 +77,22 @@ public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
out.write(key.getBytes(), 0, key.getLength()); out.write(key.getBytes(), 0, key.getLength());
out.write(value.getBytes(), 0, value.getLength()); out.write(value.getBytes(), 0, value.getLength());
} }
public void close(TaskAttemptContext context) throws IOException { public void close(TaskAttemptContext context) throws IOException {
if (finalSync) { if (finalSync) {
out.hsync(); try {
out.hsync();
} catch (UnsupportedOperationException e) {
/*
* Currently, hsync operation on striping file with erasure code
* policy is not supported yet. So this is a workaround to make
* teragen and terasort to support directory with striping files. In
* future, if the hsync operation is supported on striping file, this
* workaround should be removed.
*/
LOG.info("Operation hsync is not supported so far on path with " +
"erasure code policy set");
}
} }
out.close(); out.close();
} }
@ -135,5 +150,4 @@ public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
} }
return committer; return committer;
} }
} }

View File

@ -287,6 +287,9 @@ public class TeraSort extends Configured implements Tool {
for (TeraSortConfigKeys teraSortConfigKeys : TeraSortConfigKeys.values()) { for (TeraSortConfigKeys teraSortConfigKeys : TeraSortConfigKeys.values()) {
System.err.println(teraSortConfigKeys.toString()); System.err.println(teraSortConfigKeys.toString());
} }
System.err.println("If you want to store the output data as " +
"erasure code striping file, just make sure that the parent dir " +
"of <out> has erasure code policy set");
} }
public int run(String[] args) throws Exception { public int run(String[] args) throws Exception {