HADOOP-8449. hadoop fs -text fails with compressed sequence files with the codec file extension (backported from trunk) (harsh)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1355637 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b51d32719d
commit
6f299e4e25
|
@ -47,6 +47,9 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
HADOOP-8524. Allow users to get source of a Configuration
|
HADOOP-8524. Allow users to get source of a Configuration
|
||||||
parameter (harsh)
|
parameter (harsh)
|
||||||
|
|
||||||
|
HADOOP-8449. hadoop fs -text fails with compressed sequence files
|
||||||
|
with the codec file extension (harsh)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname
|
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname
|
||||||
|
|
|
@ -109,26 +109,33 @@ class Display extends FsCommand {
|
||||||
protected InputStream getInputStream(PathData item) throws IOException {
|
protected InputStream getInputStream(PathData item) throws IOException {
|
||||||
FSDataInputStream i = (FSDataInputStream)super.getInputStream(item);
|
FSDataInputStream i = (FSDataInputStream)super.getInputStream(item);
|
||||||
|
|
||||||
// check codecs
|
// Check type of stream first
|
||||||
|
switch(i.readShort()) {
|
||||||
|
case 0x1f8b: { // RFC 1952
|
||||||
|
// Must be gzip
|
||||||
|
i.seek(0);
|
||||||
|
return new GZIPInputStream(i);
|
||||||
|
}
|
||||||
|
case 0x5345: { // 'S' 'E'
|
||||||
|
// Might be a SequenceFile
|
||||||
|
if (i.readByte() == 'Q') {
|
||||||
|
i.close();
|
||||||
|
return new TextRecordInputStream(item.stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
// Check the type of compression instead, depending on Codec class's
|
||||||
|
// own detection methods, based on the provided path.
|
||||||
CompressionCodecFactory cf = new CompressionCodecFactory(getConf());
|
CompressionCodecFactory cf = new CompressionCodecFactory(getConf());
|
||||||
CompressionCodec codec = cf.getCodec(item.path);
|
CompressionCodec codec = cf.getCodec(item.path);
|
||||||
if (codec != null) {
|
if (codec != null) {
|
||||||
return codec.createInputStream(i);
|
return codec.createInputStream(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(i.readShort()) {
|
|
||||||
case 0x1f8b: { // RFC 1952
|
|
||||||
i.seek(0);
|
|
||||||
return new GZIPInputStream(i);
|
|
||||||
}
|
|
||||||
case 0x5345: { // 'S' 'E'
|
|
||||||
if (i.readByte() == 'Q') {
|
|
||||||
i.close();
|
|
||||||
return new TextRecordInputStream(item.stat);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// File is non-compressed, or not a file container we know.
|
||||||
i.seek(0);
|
i.seek(0);
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
||||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
|
@ -545,7 +547,7 @@ public class TestDFSShell extends TestCase {
|
||||||
textTest(new Path("/texttest").makeQualified(dfs.getUri(),
|
textTest(new Path("/texttest").makeQualified(dfs.getUri(),
|
||||||
dfs.getWorkingDirectory()), conf);
|
dfs.getWorkingDirectory()), conf);
|
||||||
|
|
||||||
conf.set("fs.default.name", dfs.getUri().toString());
|
conf.set("fs.defaultFS", dfs.getUri().toString());
|
||||||
final FileSystem lfs = FileSystem.getLocal(conf);
|
final FileSystem lfs = FileSystem.getLocal(conf);
|
||||||
textTest(new Path(TEST_ROOT_DIR, "texttest").makeQualified(lfs.getUri(),
|
textTest(new Path(TEST_ROOT_DIR, "texttest").makeQualified(lfs.getUri(),
|
||||||
lfs.getWorkingDirectory()), conf);
|
lfs.getWorkingDirectory()), conf);
|
||||||
|
@ -564,6 +566,7 @@ public class TestDFSShell extends TestCase {
|
||||||
OutputStream zout = new GZIPOutputStream(
|
OutputStream zout = new GZIPOutputStream(
|
||||||
fs.create(new Path(root, "file.gz")));
|
fs.create(new Path(root, "file.gz")));
|
||||||
Random r = new Random();
|
Random r = new Random();
|
||||||
|
bak = System.out;
|
||||||
ByteArrayOutputStream file = new ByteArrayOutputStream();
|
ByteArrayOutputStream file = new ByteArrayOutputStream();
|
||||||
for (int i = 0; i < 1024; ++i) {
|
for (int i = 0; i < 1024; ++i) {
|
||||||
char c = Character.forDigit(r.nextInt(26) + 10, 36);
|
char c = Character.forDigit(r.nextInt(26) + 10, 36);
|
||||||
|
@ -572,7 +575,6 @@ public class TestDFSShell extends TestCase {
|
||||||
}
|
}
|
||||||
zout.close();
|
zout.close();
|
||||||
|
|
||||||
bak = System.out;
|
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
System.setOut(new PrintStream(out));
|
System.setOut(new PrintStream(out));
|
||||||
|
|
||||||
|
@ -581,10 +583,28 @@ public class TestDFSShell extends TestCase {
|
||||||
argv[1] = new Path(root, "file.gz").toString();
|
argv[1] = new Path(root, "file.gz").toString();
|
||||||
int ret = ToolRunner.run(new FsShell(conf), argv);
|
int ret = ToolRunner.run(new FsShell(conf), argv);
|
||||||
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
|
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
|
||||||
file.reset();
|
|
||||||
out.reset();
|
|
||||||
assertTrue("Output doesn't match input",
|
assertTrue("Output doesn't match input",
|
||||||
Arrays.equals(file.toByteArray(), out.toByteArray()));
|
Arrays.equals(file.toByteArray(), out.toByteArray()));
|
||||||
|
|
||||||
|
// Create a sequence file with a gz extension, to test proper
|
||||||
|
// container detection
|
||||||
|
SequenceFile.Writer writer = SequenceFile.createWriter(
|
||||||
|
conf,
|
||||||
|
SequenceFile.Writer.file(new Path(root, "file.gz")),
|
||||||
|
SequenceFile.Writer.keyClass(Text.class),
|
||||||
|
SequenceFile.Writer.valueClass(Text.class));
|
||||||
|
writer.append(new Text("Foo"), new Text("Bar"));
|
||||||
|
writer.close();
|
||||||
|
out = new ByteArrayOutputStream();
|
||||||
|
System.setOut(new PrintStream(out));
|
||||||
|
argv = new String[2];
|
||||||
|
argv[0] = "-text";
|
||||||
|
argv[1] = new Path(root, "file.gz").toString();
|
||||||
|
ret = ToolRunner.run(new FsShell(conf), argv);
|
||||||
|
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
|
||||||
|
assertTrue("Output doesn't match input",
|
||||||
|
Arrays.equals("Foo\tBar\n".getBytes(), out.toByteArray()));
|
||||||
|
out.reset();
|
||||||
} finally {
|
} finally {
|
||||||
if (null != bak) {
|
if (null != bak) {
|
||||||
System.setOut(bak);
|
System.setOut(bak);
|
||||||
|
|
Loading…
Reference in New Issue