MAPREDUCE-6780. Add support for HDFS directory with erasure code policy to TeraGen and TeraSort. Contributed by Sammi Chen
This commit is contained in:
parent
ec0b70716c
commit
bea004eaeb
|
@ -246,6 +246,9 @@ public class TeraGen extends Configured implements Tool {
|
||||||
|
|
||||||
private static void usage() throws IOException {
|
private static void usage() throws IOException {
|
||||||
System.err.println("teragen <num rows> <output dir>");
|
System.err.println("teragen <num rows> <output dir>");
|
||||||
|
System.err.println("If you want to generate data and store them as " +
|
||||||
|
"erasure code striping file, just make sure that the parent dir " +
|
||||||
|
"of <output dir> has erasure code policy set");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.hadoop.examples.terasort;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
@ -40,6 +42,7 @@ import org.apache.hadoop.mapreduce.security.TokenCache;
|
||||||
* An output format that writes the key and value appended together.
|
* An output format that writes the key and value appended together.
|
||||||
*/
|
*/
|
||||||
public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
|
public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
|
||||||
|
private static final Log LOG = LogFactory.getLog(TeraOutputFormat.class);
|
||||||
private OutputCommitter committer = null;
|
private OutputCommitter committer = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -77,7 +80,19 @@ public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
|
||||||
|
|
||||||
public void close(TaskAttemptContext context) throws IOException {
|
public void close(TaskAttemptContext context) throws IOException {
|
||||||
if (finalSync) {
|
if (finalSync) {
|
||||||
out.hsync();
|
try {
|
||||||
|
out.hsync();
|
||||||
|
} catch (UnsupportedOperationException e) {
|
||||||
|
/*
|
||||||
|
* Currently, hsync operation on striping file with erasure code
|
||||||
|
* policy is not supported yet. So this is a workaround to make
|
||||||
|
* teragen and terasort to support directory with striping files. In
|
||||||
|
* future, if the hsync operation is supported on striping file, this
|
||||||
|
* workaround should be removed.
|
||||||
|
*/
|
||||||
|
LOG.info("Operation hsync is not supported so far on path with " +
|
||||||
|
"erasure code policy set");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
@ -135,5 +150,4 @@ public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
|
||||||
}
|
}
|
||||||
return committer;
|
return committer;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -287,6 +287,9 @@ public class TeraSort extends Configured implements Tool {
|
||||||
for (TeraSortConfigKeys teraSortConfigKeys : TeraSortConfigKeys.values()) {
|
for (TeraSortConfigKeys teraSortConfigKeys : TeraSortConfigKeys.values()) {
|
||||||
System.err.println(teraSortConfigKeys.toString());
|
System.err.println(teraSortConfigKeys.toString());
|
||||||
}
|
}
|
||||||
|
System.err.println("If you want to store the output data as " +
|
||||||
|
"erasure code striping file, just make sure that the parent dir " +
|
||||||
|
"of <out> has erasure code policy set");
|
||||||
}
|
}
|
||||||
|
|
||||||
public int run(String[] args) throws Exception {
|
public int run(String[] args) throws Exception {
|
||||||
|
|
Loading…
Reference in New Issue