MAPREDUCE-6730. Use StandardCharsets instead of String overload in TextOutputFormat. Contributed by Sahil Kang.
This closes #114
(cherry picked from commit 70c2781152
)
This commit is contained in:
parent
19d894538d
commit
e54de94e5a
|
@ -20,7 +20,7 @@ package org.apache.hadoop.mapred;
|
||||||
|
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
@ -43,26 +43,16 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
|
|
||||||
protected static class LineRecordWriter<K, V>
|
protected static class LineRecordWriter<K, V>
|
||||||
implements RecordWriter<K, V> {
|
implements RecordWriter<K, V> {
|
||||||
private static final String utf8 = "UTF-8";
|
private static final byte[] NEWLINE =
|
||||||
private static final byte[] newline;
|
"\n".getBytes(StandardCharsets.UTF_8);
|
||||||
static {
|
|
||||||
try {
|
|
||||||
newline = "\n".getBytes(utf8);
|
|
||||||
} catch (UnsupportedEncodingException uee) {
|
|
||||||
throw new IllegalArgumentException("can't find " + utf8 + " encoding");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected DataOutputStream out;
|
protected DataOutputStream out;
|
||||||
private final byte[] keyValueSeparator;
|
private final byte[] keyValueSeparator;
|
||||||
|
|
||||||
public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
|
public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
|
||||||
this.out = out;
|
this.out = out;
|
||||||
try {
|
this.keyValueSeparator =
|
||||||
this.keyValueSeparator = keyValueSeparator.getBytes(utf8);
|
keyValueSeparator.getBytes(StandardCharsets.UTF_8);
|
||||||
} catch (UnsupportedEncodingException uee) {
|
|
||||||
throw new IllegalArgumentException("can't find " + utf8 + " encoding");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public LineRecordWriter(DataOutputStream out) {
|
public LineRecordWriter(DataOutputStream out) {
|
||||||
|
@ -80,7 +70,7 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
Text to = (Text) o;
|
Text to = (Text) o;
|
||||||
out.write(to.getBytes(), 0, to.getLength());
|
out.write(to.getBytes(), 0, to.getLength());
|
||||||
} else {
|
} else {
|
||||||
out.write(o.toString().getBytes(utf8));
|
out.write(o.toString().getBytes(StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +91,7 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
if (!nullValue) {
|
if (!nullValue) {
|
||||||
writeObject(value);
|
writeObject(value);
|
||||||
}
|
}
|
||||||
out.write(newline);
|
out.write(NEWLINE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void close(Reporter reporter) throws IOException {
|
public synchronized void close(Reporter reporter) throws IOException {
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.hadoop.mapreduce.lib.output;
|
||||||
|
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
@ -45,26 +45,16 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
public static String SEPERATOR = "mapreduce.output.textoutputformat.separator";
|
public static String SEPERATOR = "mapreduce.output.textoutputformat.separator";
|
||||||
protected static class LineRecordWriter<K, V>
|
protected static class LineRecordWriter<K, V>
|
||||||
extends RecordWriter<K, V> {
|
extends RecordWriter<K, V> {
|
||||||
private static final String utf8 = "UTF-8";
|
private static final byte[] NEWLINE =
|
||||||
private static final byte[] newline;
|
"\n".getBytes(StandardCharsets.UTF_8);
|
||||||
static {
|
|
||||||
try {
|
|
||||||
newline = "\n".getBytes(utf8);
|
|
||||||
} catch (UnsupportedEncodingException uee) {
|
|
||||||
throw new IllegalArgumentException("can't find " + utf8 + " encoding");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected DataOutputStream out;
|
protected DataOutputStream out;
|
||||||
private final byte[] keyValueSeparator;
|
private final byte[] keyValueSeparator;
|
||||||
|
|
||||||
public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
|
public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
|
||||||
this.out = out;
|
this.out = out;
|
||||||
try {
|
this.keyValueSeparator =
|
||||||
this.keyValueSeparator = keyValueSeparator.getBytes(utf8);
|
keyValueSeparator.getBytes(StandardCharsets.UTF_8);
|
||||||
} catch (UnsupportedEncodingException uee) {
|
|
||||||
throw new IllegalArgumentException("can't find " + utf8 + " encoding");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public LineRecordWriter(DataOutputStream out) {
|
public LineRecordWriter(DataOutputStream out) {
|
||||||
|
@ -82,7 +72,7 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
Text to = (Text) o;
|
Text to = (Text) o;
|
||||||
out.write(to.getBytes(), 0, to.getLength());
|
out.write(to.getBytes(), 0, to.getLength());
|
||||||
} else {
|
} else {
|
||||||
out.write(o.toString().getBytes(utf8));
|
out.write(o.toString().getBytes(StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,7 +93,7 @@ public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
if (!nullValue) {
|
if (!nullValue) {
|
||||||
writeObject(value);
|
writeObject(value);
|
||||||
}
|
}
|
||||||
out.write(newline);
|
out.write(NEWLINE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized
|
public synchronized
|
||||||
|
|
Loading…
Reference in New Issue