HADOOP-6984. Combine the compress kind and the codec in the same option

for SequenceFiles. (cdouglas via omalley)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1004900 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Owen O'Malley 2010-10-06 06:16:31 +00:00
parent aeafc8f3ef
commit 5001565459
6 changed files with 75 additions and 93 deletions

View File

@ -253,6 +253,9 @@ Trunk (unreleased changes)
HADOOP-6989. Correct the parameter for SetFile to set the value type HADOOP-6989. Correct the parameter for SetFile to set the value type
for SetFile to be NullWritable instead of the key. (cdouglas via omalley) for SetFile to be NullWritable instead of the key. (cdouglas via omalley)
HADOOP-6984. Combine the compress kind and the codec in the same option
for SequenceFiles. (cdouglas via omalley)
Release 0.21.1 - Unreleased Release 0.21.1 - Unreleased
IMPROVEMENTS IMPROVEMENTS

View File

@ -54,7 +54,7 @@ public class ArrayFile extends MapFile {
super(conf, new Path(file), super(conf, new Path(file),
keyClass(LongWritable.class), keyClass(LongWritable.class),
valueClass(valClass), valueClass(valClass),
compressionType(compress), compression(compress),
progressable(progress)); progressable(progress));
} }

View File

@ -89,8 +89,7 @@ public class BloomMapFile {
Class<? extends Writable> valClass, CompressionType compress, Class<? extends Writable> valClass, CompressionType compress,
CompressionCodec codec, Progressable progress) throws IOException { CompressionCodec codec, Progressable progress) throws IOException {
this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
compressionType(compress), compressionCodec(codec), compression(compress, codec), progressable(progress));
progressable(progress));
} }
@Deprecated @Deprecated
@ -99,7 +98,7 @@ public class BloomMapFile {
Class valClass, CompressionType compress, Class valClass, CompressionType compress,
Progressable progress) throws IOException { Progressable progress) throws IOException {
this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
compressionType(compress), progressable(progress)); compression(compress), progressable(progress));
} }
@Deprecated @Deprecated
@ -108,7 +107,7 @@ public class BloomMapFile {
Class valClass, CompressionType compress) Class valClass, CompressionType compress)
throws IOException { throws IOException {
this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
compressionType(compress)); compression(compress));
} }
@Deprecated @Deprecated
@ -117,8 +116,8 @@ public class BloomMapFile {
CompressionType compress, CompressionCodec codec, Progressable progress) CompressionType compress, CompressionCodec codec, Progressable progress)
throws IOException { throws IOException {
this(conf, new Path(dirName), comparator(comparator), this(conf, new Path(dirName), comparator(comparator),
valueClass(valClass), compressionType(compress), valueClass(valClass), compression(compress, codec),
compressionCodec(codec), progressable(progress)); progressable(progress));
} }
@Deprecated @Deprecated
@ -126,7 +125,7 @@ public class BloomMapFile {
WritableComparator comparator, Class valClass, WritableComparator comparator, Class valClass,
CompressionType compress, Progressable progress) throws IOException { CompressionType compress, Progressable progress) throws IOException {
this(conf, new Path(dirName), comparator(comparator), this(conf, new Path(dirName), comparator(comparator),
valueClass(valClass), compressionType(compress), valueClass(valClass), compression(compress),
progressable(progress)); progressable(progress));
} }
@ -135,7 +134,7 @@ public class BloomMapFile {
WritableComparator comparator, Class valClass, CompressionType compress) WritableComparator comparator, Class valClass, CompressionType compress)
throws IOException { throws IOException {
this(conf, new Path(dirName), comparator(comparator), this(conf, new Path(dirName), comparator(comparator),
valueClass(valClass), compressionType(compress)); valueClass(valClass), compression(compress));
} }
@Deprecated @Deprecated

View File

@ -113,7 +113,7 @@ public class MapFile {
CompressionType compress, CompressionType compress,
Progressable progress) throws IOException { Progressable progress) throws IOException {
this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
compressionType(compress), progressable(progress)); compression(compress), progressable(progress));
} }
/** Create the named map for keys of the named class. /** Create the named map for keys of the named class.
@ -125,8 +125,7 @@ public class MapFile {
CompressionType compress, CompressionCodec codec, CompressionType compress, CompressionCodec codec,
Progressable progress) throws IOException { Progressable progress) throws IOException {
this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
compressionType(compress), compressionCodec(codec), compression(compress, codec), progressable(progress));
progressable(progress));
} }
/** Create the named map for keys of the named class. /** Create the named map for keys of the named class.
@ -137,7 +136,7 @@ public class MapFile {
Class<? extends WritableComparable> keyClass, Class valClass, Class<? extends WritableComparable> keyClass, Class valClass,
CompressionType compress) throws IOException { CompressionType compress) throws IOException {
this(conf, new Path(dirName), keyClass(keyClass), this(conf, new Path(dirName), keyClass(keyClass),
valueClass(valClass), compressionType(compress)); valueClass(valClass), compression(compress));
} }
/** Create the named map using the named key comparator. /** Create the named map using the named key comparator.
@ -159,7 +158,7 @@ public class MapFile {
WritableComparator comparator, Class valClass, WritableComparator comparator, Class valClass,
SequenceFile.CompressionType compress) throws IOException { SequenceFile.CompressionType compress) throws IOException {
this(conf, new Path(dirName), comparator(comparator), this(conf, new Path(dirName), comparator(comparator),
valueClass(valClass), compressionType(compress)); valueClass(valClass), compression(compress));
} }
/** Create the named map using the named key comparator. /** Create the named map using the named key comparator.
@ -171,7 +170,7 @@ public class MapFile {
SequenceFile.CompressionType compress, SequenceFile.CompressionType compress,
Progressable progress) throws IOException { Progressable progress) throws IOException {
this(conf, new Path(dirName), comparator(comparator), this(conf, new Path(dirName), comparator(comparator),
valueClass(valClass), compressionType(compress), valueClass(valClass), compression(compress),
progressable(progress)); progressable(progress));
} }
@ -184,8 +183,8 @@ public class MapFile {
SequenceFile.CompressionType compress, CompressionCodec codec, SequenceFile.CompressionType compress, CompressionCodec codec,
Progressable progress) throws IOException { Progressable progress) throws IOException {
this(conf, new Path(dirName), comparator(comparator), this(conf, new Path(dirName), comparator(comparator),
valueClass(valClass), compressionType(compress), valueClass(valClass), compression(compress, codec),
compressionCodec(codec), progressable(progress)); progressable(progress));
} }
// our options are a superset of sequence file writer options // our options are a superset of sequence file writer options
@ -221,13 +220,14 @@ public class MapFile {
} }
public static public static
SequenceFile.Writer.Option compressionType(CompressionType value) { SequenceFile.Writer.Option compression(CompressionType type) {
return SequenceFile.Writer.compressionType(value); return SequenceFile.Writer.compression(type);
} }
public static public static
SequenceFile.Writer.Option compressionCodec(CompressionCodec value) { SequenceFile.Writer.Option compression(CompressionType type,
return SequenceFile.Writer.compressionCodec(value); CompressionCodec codec) {
return SequenceFile.Writer.compression(type, codec);
} }
public static SequenceFile.Writer.Option progressable(Progressable value) { public static SequenceFile.Writer.Option progressable(Progressable value) {
@ -274,11 +274,10 @@ public class MapFile {
this.data = SequenceFile.createWriter(conf, dataOptions); this.data = SequenceFile.createWriter(conf, dataOptions);
SequenceFile.Writer.Option[] indexOptions = SequenceFile.Writer.Option[] indexOptions =
Options.prependOptions(opts, Options.prependOptions(opts, SequenceFile.Writer.file(indexFile),
SequenceFile.Writer.file(indexFile), SequenceFile.Writer.keyClass(keyClass),
SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(LongWritable.class),
SequenceFile.Writer.valueClass(LongWritable.class), SequenceFile.Writer.compression(CompressionType.BLOCK));
SequenceFile.Writer.compressionType(CompressionType.BLOCK));
this.index = SequenceFile.createWriter(conf, indexOptions); this.index = SequenceFile.createWriter(conf, indexOptions);
} }

View File

@ -252,22 +252,23 @@ public class SequenceFile {
*/ */
public static Writer createWriter(Configuration conf, Writer.Option... opts public static Writer createWriter(Configuration conf, Writer.Option... opts
) throws IOException { ) throws IOException {
Writer.CompressionTypeOption compressionOption = Writer.CompressionOption compressionOption =
Options.getOption(Writer.CompressionTypeOption.class, opts); Options.getOption(Writer.CompressionOption.class, opts);
CompressionType kind; CompressionType kind;
if (compressionOption != null) { if (compressionOption != null) {
kind = compressionOption.getValue(); kind = compressionOption.getValue();
} else { } else {
kind = getDefaultCompressionType(conf); kind = getDefaultCompressionType(conf);
opts = Options.prependOptions(opts, Writer.compression(kind));
} }
switch (kind) { switch (kind) {
default: default:
case NONE: case NONE:
return new Writer(conf, kind, opts); return new Writer(conf, opts);
case RECORD: case RECORD:
return new RecordCompressWriter(conf, kind, opts); return new RecordCompressWriter(conf, opts);
case BLOCK: case BLOCK:
return new BlockCompressWriter(conf, kind, opts); return new BlockCompressWriter(conf, opts);
} }
} }
@ -311,7 +312,7 @@ public class SequenceFile {
CompressionType compressionType) throws IOException { CompressionType compressionType) throws IOException {
return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass), return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType)); Writer.compression(compressionType));
} }
/** /**
@ -335,7 +336,7 @@ public class SequenceFile {
Progressable progress) throws IOException { Progressable progress) throws IOException {
return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass), return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType), Writer.compression(compressionType),
Writer.progressable(progress)); Writer.progressable(progress));
} }
@ -360,8 +361,7 @@ public class SequenceFile {
CompressionCodec codec) throws IOException { CompressionCodec codec) throws IOException {
return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass), return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType), Writer.compression(compressionType, codec));
Writer.compressionCodec(codec));
} }
/** /**
@ -388,8 +388,7 @@ public class SequenceFile {
Progressable progress, Metadata metadata) throws IOException { Progressable progress, Metadata metadata) throws IOException {
return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass), return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType), Writer.compression(compressionType, codec),
Writer.compressionCodec(codec),
Writer.progressable(progress), Writer.progressable(progress),
Writer.metadata(metadata)); Writer.metadata(metadata));
} }
@ -425,8 +424,7 @@ public class SequenceFile {
Writer.bufferSize(bufferSize), Writer.bufferSize(bufferSize),
Writer.replication(replication), Writer.replication(replication),
Writer.blockSize(blockSize), Writer.blockSize(blockSize),
Writer.compressionType(compressionType), Writer.compression(compressionType, codec),
Writer.compressionCodec(codec),
Writer.progressable(progress), Writer.progressable(progress),
Writer.metadata(metadata)); Writer.metadata(metadata));
} }
@ -454,8 +452,7 @@ public class SequenceFile {
Progressable progress) throws IOException { Progressable progress) throws IOException {
return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass), return createWriter(conf, Writer.file(name), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType), Writer.compression(compressionType, codec),
Writer.compressionCodec(codec),
Writer.progressable(progress)); Writer.progressable(progress));
} }
@ -481,8 +478,7 @@ public class SequenceFile {
CompressionCodec codec, Metadata metadata) throws IOException { CompressionCodec codec, Metadata metadata) throws IOException {
return createWriter(conf, Writer.stream(out), Writer.keyClass(keyClass), return createWriter(conf, Writer.stream(out), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType), Writer.compression(compressionType, codec),
Writer.compressionCodec(codec),
Writer.metadata(metadata)); Writer.metadata(metadata));
} }
@ -506,8 +502,7 @@ public class SequenceFile {
CompressionCodec codec) throws IOException { CompressionCodec codec) throws IOException {
return createWriter(conf, Writer.stream(out), Writer.keyClass(keyClass), return createWriter(conf, Writer.stream(out), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compressionType), Writer.compression(compressionType, codec));
Writer.compressionCodec(codec));
} }
@ -839,23 +834,23 @@ public class SequenceFile {
} }
} }
private static class CompressionTypeOption implements Option { private static class CompressionOption implements Option {
private final CompressionType value; private final CompressionType value;
CompressionTypeOption(CompressionType value) { private final CompressionCodec codec;
CompressionOption(CompressionType value) {
this(value, null);
}
CompressionOption(CompressionType value, CompressionCodec codec) {
this.value = value; this.value = value;
this.codec = (CompressionType.NONE != value && null == codec)
? new DefaultCodec()
: codec;
} }
CompressionType getValue() { CompressionType getValue() {
return value; return value;
} }
} CompressionCodec getCodec() {
return codec;
private static class CompressionCodecOption implements Option {
private final CompressionCodec value;
CompressionCodecOption(CompressionCodec value) {
this.value = value;
}
CompressionCodec getValue() {
return value;
} }
} }
@ -895,25 +890,23 @@ public class SequenceFile {
return new MetadataOption(value); return new MetadataOption(value);
} }
public static Option compressionType(CompressionType value) { public static Option compression(CompressionType value) {
return new CompressionTypeOption(value); return new CompressionOption(value);
} }
public static Option compressionCodec(CompressionCodec value) { public static Option compression(CompressionType value,
return new CompressionCodecOption(value); CompressionCodec codec) {
return new CompressionOption(value, codec);
} }
/** /**
* Construct a uncompressed writer from a set of options. * Construct a uncompressed writer from a set of options.
* @param conf the configuration to use * @param conf the configuration to use
* @param compressionType the compression type being used
* @param options the options used when creating the writer * @param options the options used when creating the writer
* @throws IOException if it fails * @throws IOException if it fails
*/ */
Writer(Configuration conf, Writer(Configuration conf,
CompressionType compressionType,
Option... opts) throws IOException { Option... opts) throws IOException {
this.compress = compressionType;
BlockSizeOption blockSizeOption = BlockSizeOption blockSizeOption =
Options.getOption(BlockSizeOption.class, opts); Options.getOption(BlockSizeOption.class, opts);
BufferSizeOption bufferSizeOption = BufferSizeOption bufferSizeOption =
@ -928,10 +921,10 @@ public class SequenceFile {
Options.getOption(KeyClassOption.class, opts); Options.getOption(KeyClassOption.class, opts);
ValueClassOption valueClassOption = ValueClassOption valueClassOption =
Options.getOption(ValueClassOption.class, opts); Options.getOption(ValueClassOption.class, opts);
CompressionCodecOption compressionCodecOption =
Options.getOption(CompressionCodecOption.class, opts);
MetadataOption metadataOption = MetadataOption metadataOption =
Options.getOption(MetadataOption.class, opts); Options.getOption(MetadataOption.class, opts);
CompressionOption compressionTypeOption =
Options.getOption(CompressionOption.class, opts);
// check consistency of options // check consistency of options
if ((fileOption == null) == (streamOption == null)) { if ((fileOption == null) == (streamOption == null)) {
throw new IllegalArgumentException("file or stream must be specified"); throw new IllegalArgumentException("file or stream must be specified");
@ -968,13 +961,8 @@ public class SequenceFile {
Object.class : valueClassOption.getValue(); Object.class : valueClassOption.getValue();
Metadata metadata = metadataOption == null ? Metadata metadata = metadataOption == null ?
new Metadata() : metadataOption.getValue(); new Metadata() : metadataOption.getValue();
CompressionCodec codec; this.compress = compressionTypeOption.getValue();
if (compressionType == CompressionType.NONE) { final CompressionCodec codec = compressionTypeOption.getCodec();
codec = null;
} else {
codec = compressionCodecOption == null ?
new DefaultCodec() : compressionCodecOption.getValue();
}
if (codec != null && if (codec != null &&
(codec instanceof GzipCodec) && (codec instanceof GzipCodec) &&
!NativeCodeLoader.isNativeCodeLoaded() && !NativeCodeLoader.isNativeCodeLoaded() &&
@ -1207,9 +1195,8 @@ public class SequenceFile {
static class RecordCompressWriter extends Writer { static class RecordCompressWriter extends Writer {
RecordCompressWriter(Configuration conf, RecordCompressWriter(Configuration conf,
CompressionType compressionType,
Option... options) throws IOException { Option... options) throws IOException {
super(conf, compressionType, options); super(conf, options);
} }
/** Append a key/value pair. */ /** Append a key/value pair. */
@ -1276,9 +1263,8 @@ public class SequenceFile {
private final int compressionBlockSize; private final int compressionBlockSize;
BlockCompressWriter(Configuration conf, BlockCompressWriter(Configuration conf,
CompressionType compressionType,
Option... options) throws IOException { Option... options) throws IOException {
super(conf, compressionType, options); super(conf, options);
compressionBlockSize = compressionBlockSize =
conf.getInt("io.seqfile.compress.blocksize", 1000000); conf.getInt("io.seqfile.compress.blocksize", 1000000);
keySerializer.close(); keySerializer.close();
@ -2756,14 +2742,10 @@ public class SequenceFile {
} }
long segmentStart = out.getPos(); long segmentStart = out.getPos();
Writer writer = createWriter(conf, Writer writer = createWriter(conf, Writer.stream(out),
Writer.stream(out), Writer.keyClass(keyClass), Writer.valueClass(valClass),
Writer.keyClass(keyClass), Writer.compression(compressionType, codec),
Writer.valueClass(valClass), Writer.metadata(done ? metadata : new Metadata()));
Writer.compressionType(compressionType),
Writer.compressionCodec(codec),
Writer.metadata(done ? metadata :
new Metadata()));
if (!done) { if (!done) {
writer.sync = null; // disable sync on temp files writer.sync = null; // disable sync on temp files
@ -2943,8 +2925,7 @@ public class SequenceFile {
Writer.file(outputFile), Writer.file(outputFile),
Writer.keyClass(keyClass), Writer.keyClass(keyClass),
Writer.valueClass(valClass), Writer.valueClass(valClass),
Writer.compressionType(compress), Writer.compression(compress, codec),
Writer.compressionCodec(codec),
Writer.progressable(prog)); Writer.progressable(prog));
return writer; return writer;
} }

View File

@ -60,7 +60,7 @@ public class SetFile extends MapFile {
super(conf, new Path(dirName), super(conf, new Path(dirName),
comparator(comparator), comparator(comparator),
valueClass(NullWritable.class), valueClass(NullWritable.class),
compressionType(compress)); compression(compress));
} }
/** Append a key to a set. The key must be strictly greater than the /** Append a key to a set. The key must be strictly greater than the