LUCENE-4226: Efficient stored fields compression.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1394578 13f79535-47bb-0310-9956-ffa450edef68
2025-03-03 06:49:38 +00:00 · 2012-10-05 15:14:35 +00:00 · 2012-10-05 15:14:35 +00:00 · c3e00c353e
commit c3e00c353e
parent 1d521aa6bf
21 changed files with 2770 additions and 1 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -20,6 +20,10 @@ Changes in backwards compatibility policy

 New Features

+* LUCENE-4226: New experimental StoredFieldsFormat (in lucene/codecs) that
+  compresses chunks of documents together in order to improve the compression
+  ratio. (Adrien Grand)
+
 * LUCENE-4426: New ValueSource implementations (in lucene/queries) for
  DocValues fields. (Adrien Grand)

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
@ -0,0 +1,121 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/**
+ * A {@link StoredFieldsFormat} that is very similar to
+ * {@link Lucene40StoredFieldsFormat} but compresses documents in chunks in
+ * order to improve compression ratio.
+ * <p>
+ * For optimal performance, you should use a {@link MergePolicy} that returns
+ * segments that have the biggest byte size first.
+ * @lucene.experimental
+ */
+public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
+
+  private final CompressingStoredFieldsIndex storedFieldsIndexFormat;
+  private final CompressionMode compressionMode;
+  private final int chunkSize;
+
+  /**
+   * Create a new {@link CompressingStoredFieldsFormat}.
+   * <p>
+   * The <code>compressionMode</code> parameter allows you to choose between
+   * compression algorithms that have various compression and uncompression
+   * speeds so that you can pick the one that best fits your indexing and
+   * searching throughput.
+   * <p>
+   * <code>chunkSize</code> is the minimum byte size of a chunk of documents.
+   * A value of <code>1</code> can make sense if there is redundancy across
+   * fields. In that case, both performance and compression ratio should be
+   * better than with {@link Lucene40StoredFieldsFormat} with compressed
+   * fields.
+   * <p>
+   * Higher values of <code>chunkSize</code> should improve the compression
+   * atio but will require more memory at indexing time and might make document
+   * loading a little slower (depending on the size of your OS cache compared
+   * to the size of your index).
+   * <p>
+   * The <code>storedFieldsIndexFormat</code> parameter allows you to choose
+   * bettwen several fields index formats that offer various trade-offs between
+   * memory usage and speed.
+   *
+   * @param compressionMode the {@link CompressionMode} to use
+   * @param chunkSize the minimum number of bytes of a single chunk of stored documents
+   * @param storedFieldsIndexFormat the format to use to load the fields index
+   * @see CompressionMode
+   * @see CompressingStoredFieldsIndex
+   */
+  public CompressingStoredFieldsFormat(CompressionMode compressionMode, int chunkSize,
+      CompressingStoredFieldsIndex storedFieldsIndexFormat) {
+    this.compressionMode = compressionMode;
+    if (chunkSize < 1) {
+      throw new IllegalArgumentException("chunkSize must be >= 1");
+    }
+    this.chunkSize = chunkSize;
+    this.storedFieldsIndexFormat = storedFieldsIndexFormat;
+  }
+
+  /**
+   * Create a new {@link CompressingStoredFieldsFormat} with an in-memory
+   * {@link CompressingStoredFieldsIndex}.
+   *
+   * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(CompressionMode, int, CompressingStoredFieldsIndex)
+   */
+  public CompressingStoredFieldsFormat(CompressionMode compressionMode, int chunkSize) {
+    this (compressionMode, chunkSize, chunkSize == 1
+        ? CompressingStoredFieldsIndex.MEMORY_DOC
+        : CompressingStoredFieldsIndex.MEMORY_CHUNK);
+  }
+
+  /**
+   * Create a new {@link CompressingStoredFieldsFormat} with
+   * {@link CompressionMode#FAST} compression and chunks of <tt>16 KB</tt>.
+   *
+   * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(CompressionMode, int)
+   */
+  public CompressingStoredFieldsFormat() {
+    this(CompressionMode.FAST, 1 << 14);
+  }
+
+  @Override
+  public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
+      FieldInfos fn, IOContext context) throws IOException {
+    return new CompressingStoredFieldsReader(directory, si, fn, context);
+  }
+
+  @Override
+  public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
+      IOContext context) throws IOException {
+    return new CompressingStoredFieldsWriter(directory, si, context,
+        compressionMode, chunkSize, storedFieldsIndexFormat);
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndex.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndex.java
@ -0,0 +1,411 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.packed.GrowableWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * A format for the stored fields index file (.fdx).
+ * <p>
+ * These formats allow different memory/speed trade-offs to locate documents
+ * into the fields data file (.fdt).
+ * @lucene.experimental
+ */
+public enum CompressingStoredFieldsIndex {
+
+  /**
+   * This format stores the document index on disk using 64-bits pointers to
+   * the start offsets of chunks in the fields data file.
+   * <p>
+   * This format has no memory overhead and requires at most 1 disk seek to
+   * locate a document in the fields data file. Use this format in
+   * memory-constrained environments.
+   */
+  DISK_DOC(0) {
+    @Override
+    Writer newWriter(IndexOutput out) {
+      return new DiskDocFieldsIndexWriter(out);
+    }
+    @Override
+    Reader newReader(IndexInput in, SegmentInfo si) throws IOException {
+      return new DiskDocFieldsIndexReader(in, si);
+    }
+  },
+
+  /**
+   * For every document in the segment, this format stores the offset of the
+   * compressed chunk that contains it in the fields data file.
+   * <p>
+   * This fields index format requires at most <code>8 * numDocs</code> bytes
+   * of memory. Locating a document in the fields data file requires no disk
+   * seek. Use this format when blocks are very likely to contain few
+   * documents (in particular when <code>chunkSize = 1</code>).
+   */
+  MEMORY_DOC(1) {
+    @Override
+    Writer newWriter(IndexOutput out) throws IOException {
+      return new ChunksFieldsIndexWriter(out);
+    }
+    @Override
+    Reader newReader(IndexInput in, SegmentInfo si) throws IOException {
+      return new MemoryDocFieldsIndexReader(in, si);
+    }
+  },
+
+  /**
+   * For every chunk of compressed documents, this format stores the first doc
+   * ID of the chunk as well as the start offset of the chunk.
+   * <p>
+   * This fields index format require at most
+   * <code>12 * numChunks</code> bytes of memory. Locating a document in the
+   * fields data file requires no disk seek. Use this format when chunks are
+   * likely to contain several documents.
+   */
+  MEMORY_CHUNK(2) {
+    @Override
+    Writer newWriter(IndexOutput out) throws IOException {
+      return new ChunksFieldsIndexWriter(out);
+    }
+    @Override
+    Reader newReader(IndexInput in, SegmentInfo si) throws IOException {
+      return new MemoryChunkFieldsIndexReader(in, si);
+    }
+  };
+
+  /**
+   * Retrieve a {@link CompressingStoredFieldsIndex} according to its
+   * <code>ID</code>.
+   */
+  public static CompressingStoredFieldsIndex byId(int id) {
+    for (CompressingStoredFieldsIndex idx : CompressingStoredFieldsIndex.values()) {
+      if (idx.getId() == id) {
+        return idx;
+      }
+    }
+    throw new IllegalArgumentException("Unknown id: " + id);
+  }
+
+  private final int id;
+
+  private CompressingStoredFieldsIndex(int id) {
+    this.id = id;
+  }
+
+  /**
+   * Returns an ID for this compression mode. Should be unique across
+   * {@link CompressionMode}s as it is used for serialization and
+   * unserialization.
+   */
+  public final int getId() {
+    return id;
+  }
+
+  abstract Writer newWriter(IndexOutput out) throws IOException;
+
+  abstract Reader newReader(IndexInput in, SegmentInfo si) throws IOException;
+
+  static abstract class Writer implements Closeable {
+
+    protected final IndexOutput fieldsIndexOut;
+
+    Writer(IndexOutput indexOutput) {
+      this.fieldsIndexOut = indexOutput;
+    }
+
+    /** Write the index file for a chunk of <code>numDocs</code> docs starting
+     *  at offset <code>startPointer</code>. */
+    abstract void writeIndex(int numDocs, long startPointer) throws IOException;
+
+    /** Finish writing an index file of <code>numDocs</code> documents. */
+    abstract void finish(int numDocs) throws IOException;
+
+    @Override
+    public void close() throws IOException {
+      fieldsIndexOut.close();
+    }
+
+  }
+
+  private static class DiskDocFieldsIndexWriter extends Writer {
+
+    final long startOffset;
+
+    DiskDocFieldsIndexWriter(IndexOutput fieldsIndexOut) {
+      super(fieldsIndexOut);
+      startOffset = fieldsIndexOut.getFilePointer();
+    }
+
+    @Override
+    void writeIndex(int numDocs, long startPointer) throws IOException {
+      for (int i = 0; i < numDocs; ++i) {
+        fieldsIndexOut.writeLong(startPointer);
+      }
+    }
+
+    @Override
+    void finish(int numDocs) throws IOException {
+      if (startOffset + ((long) numDocs) * 8 != fieldsIndexOut.getFilePointer()) {
+        // see Lucene40StoredFieldsWriter#finish
+        throw new RuntimeException((fieldsIndexOut.getFilePointer() - startOffset)/8 + " fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + fieldsIndexOut.getFilePointer() + " file=" + fieldsIndexOut.toString() + "; now aborting this merge to prevent index corruption");
+      }
+    }
+
+  }
+
+  private static class ChunksFieldsIndexWriter extends Writer {
+
+    int numChunks;
+    long maxStartPointer;
+    GrowableWriter docBaseDeltas;
+    GrowableWriter startPointerDeltas;
+
+    ChunksFieldsIndexWriter(IndexOutput indexOutput) {
+      super(indexOutput);
+      numChunks = 0;
+      maxStartPointer = 0;
+      docBaseDeltas = new GrowableWriter(2, 128, PackedInts.COMPACT);
+      startPointerDeltas = new GrowableWriter(5, 128, PackedInts.COMPACT);
+    }
+
+    @Override
+    void writeIndex(int numDocs, long startPointer) throws IOException {
+      if (numChunks == docBaseDeltas.size()) {
+        final int newSize = ArrayUtil.oversize(numChunks + 1, 1);
+        docBaseDeltas = docBaseDeltas.resize(newSize);
+        startPointerDeltas = startPointerDeltas.resize(newSize);
+      }
+      docBaseDeltas.set(numChunks, numDocs);
+      startPointerDeltas.set(numChunks, startPointer - maxStartPointer);
+
+      ++numChunks;
+      maxStartPointer = startPointer;
+    }
+
+    @Override
+    void finish(int numDocs) throws IOException {
+      if (numChunks != docBaseDeltas.size()) {
+        docBaseDeltas = docBaseDeltas.resize(numChunks);
+        startPointerDeltas = startPointerDeltas.resize(numChunks);
+      }
+      fieldsIndexOut.writeVInt(numChunks);
+      fieldsIndexOut.writeByte((byte) PackedInts.bitsRequired(maxStartPointer));
+      docBaseDeltas.save(fieldsIndexOut);
+      startPointerDeltas.save(fieldsIndexOut);
+    }
+
+  }
+
+  static abstract class Reader implements Cloneable, Closeable {
+
+    protected final IndexInput fieldsIndexIn;
+
+    Reader(IndexInput fieldsIndexIn) {
+      this.fieldsIndexIn = fieldsIndexIn;
+    }
+
+    /** Get the start pointer of the compressed block that contains docID */
+    abstract long getStartPointer(int docID) throws IOException;
+
+    public void close() throws IOException {
+      if (fieldsIndexIn != null) {
+        fieldsIndexIn.close();
+      }
+    }
+
+    public abstract Reader clone();
+  }
+
+  private static class DiskDocFieldsIndexReader extends Reader {
+
+    final long startPointer;
+
+    DiskDocFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws CorruptIndexException {
+      this(fieldsIndexIn, fieldsIndexIn.getFilePointer());
+      final long indexSize = fieldsIndexIn.length() - fieldsIndexIn.getFilePointer();
+      final int numDocs = (int) (indexSize >> 3);
+      // Verify two sources of "maxDoc" agree:
+      if (numDocs != si.getDocCount()) {
+        throw new CorruptIndexException("doc counts differ for segment " + si + ": fieldsReader shows " + numDocs + " but segmentInfo shows " + si.getDocCount());
+      }
+    }
+
+    private DiskDocFieldsIndexReader(IndexInput fieldsIndexIn, long startPointer) {
+      super(fieldsIndexIn);
+      this.startPointer = startPointer;
+    }
+
+    @Override
+    long getStartPointer(int docID) throws IOException {
+      fieldsIndexIn.seek(startPointer + docID * 8L);
+      return fieldsIndexIn.readLong();
+    }
+
+    @Override
+    public Reader clone() {
+      return new DiskDocFieldsIndexReader(fieldsIndexIn.clone(), startPointer);
+    }
+
+  }
+
+  private static class MemoryDocFieldsIndexReader extends Reader {
+
+    private final PackedInts.Reader startPointers;
+
+    MemoryDocFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
+      super(fieldsIndexIn);
+      final int numChunks = fieldsIndexIn.readVInt();
+      final int bitsPerStartPointer = fieldsIndexIn.readByte() & 0xFF;
+      if (bitsPerStartPointer > 64) {
+        throw new CorruptIndexException("Corrupted");
+      }
+
+      final PackedInts.Reader chunkDocs = PackedInts.getReader(fieldsIndexIn);
+      if (chunkDocs.size() != numChunks) {
+        throw new CorruptIndexException("Expected " + numChunks + " chunks, but got " + chunkDocs.size());
+      }
+
+      final PackedInts.ReaderIterator startPointerDeltas = PackedInts.getReaderIterator(fieldsIndexIn, PackedInts.DEFAULT_BUFFER_SIZE);
+      if (startPointerDeltas.size() != numChunks) {
+        throw new CorruptIndexException("Expected " + numChunks + " chunks, but got " + startPointerDeltas.size());
+      }
+      final PackedInts.Mutable startPointers = PackedInts.getMutable(si.getDocCount(), bitsPerStartPointer, PackedInts.COMPACT);
+      int docID = 0;
+      long startPointer = 0;
+      for (int i = 0; i < numChunks; ++i) {
+        startPointer += startPointerDeltas.next();
+        final int chunkDocCount = (int) chunkDocs.get(i);
+        for (int j = 0; j < chunkDocCount; ++j) {
+          startPointers.set(docID++, startPointer);
+        }
+      }
+      if (docID != si.getDocCount()) {
+        throw new CorruptIndexException("Expected " + si.getDocCount() + " docs, got " + docID);
+      }
+
+      this.startPointers = startPointers;
+    }
+
+    private MemoryDocFieldsIndexReader(PackedInts.Reader startPointers) {
+      super(null);
+      this.startPointers = startPointers;
+    }
+
+    @Override
+    long getStartPointer(int docID) throws IOException {
+      return startPointers.get(docID);
+    }
+
+    @Override
+    public Reader clone() {
+      if (fieldsIndexIn == null) {
+        return this;
+      } else {
+        return new MemoryDocFieldsIndexReader(startPointers);
+      }
+    }
+
+  }
+
+  private static class MemoryChunkFieldsIndexReader extends Reader {
+
+    private final PackedInts.Reader docBases;
+    private final PackedInts.Reader startPointers;
+
+     MemoryChunkFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
+      super(fieldsIndexIn);
+      final int numChunks = fieldsIndexIn.readVInt();
+      final int bitsPerStartPointer = fieldsIndexIn.readByte() & 0xFF;
+      if (bitsPerStartPointer > 64) {
+        throw new CorruptIndexException("Corrupted");
+      }
+
+      final PackedInts.ReaderIterator docBaseDeltas = PackedInts.getReaderIterator(fieldsIndexIn, PackedInts.DEFAULT_BUFFER_SIZE);
+      if (docBaseDeltas.size() != numChunks) {
+        throw new CorruptIndexException("Expected " + numChunks + " chunks, but got " + docBaseDeltas.size());
+      }
+      final PackedInts.Mutable docBases = PackedInts.getMutable(numChunks, PackedInts.bitsRequired(Math.max(0, si.getDocCount() - 1)), PackedInts.COMPACT);
+
+      int docBase = 0;
+      for (int i = 0; i < numChunks; ++i) {
+        docBases.set(i, docBase);
+        docBase += docBaseDeltas.next();
+      }
+      if (docBase != si.getDocCount()) {
+        throw new CorruptIndexException("Expected " + si.getDocCount() + " docs, got " + docBase);
+      }
+
+      final PackedInts.ReaderIterator startPointerDeltas = PackedInts.getReaderIterator(fieldsIndexIn, PackedInts.DEFAULT_BUFFER_SIZE);
+      if (startPointerDeltas.size() != numChunks) {
+        throw new CorruptIndexException("Expected " + numChunks + " chunks, but got " + startPointerDeltas.size());
+      }
+      final PackedInts.Mutable startPointers = PackedInts.getMutable(numChunks, bitsPerStartPointer, PackedInts.COMPACT);
+      int startPointer = 0;
+      for (int i = 0; i < numChunks; ++i) {
+        startPointer += startPointerDeltas.next();
+        startPointers.set(i, startPointer);
+      }
+
+      this.docBases = docBases;
+      this.startPointers = startPointers;
+    }
+
+    private MemoryChunkFieldsIndexReader(PackedInts.Reader docBases, PackedInts.Reader startPointers) {
+      super(null);
+      this.docBases = docBases;
+      this.startPointers = startPointers;
+    }
+
+    @Override
+    long getStartPointer(int docID) {
+      assert docBases.size() > 0;
+      int lo = 0, hi = docBases.size() - 1;
+      while (lo <= hi) {
+        final int mid = (lo + hi) >>> 1;
+        final long midValue = docBases.get(mid);
+        if (midValue == docID) {
+          return startPointers.get(mid);
+        } else if (midValue < docID) {
+          lo = mid + 1;
+        } else {
+          hi = mid - 1;
+        }
+      }
+      return startPointers.get(hi);
+    }
+
+    @Override
+    public Reader clone() {
+      if (fieldsIndexIn == null) {
+        return this;
+      } else {
+        return new MemoryChunkFieldsIndexReader(docBases, startPointers);
+      }
+    }
+
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
@ -0,0 +1,341 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.BYTE_ARR;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_NAME_DAT;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.*;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.HEADER_LENGTH_DAT;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.HEADER_LENGTH_IDX;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_DOUBLE;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_FLOAT;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_INT;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_LONG;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.STRING;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
+import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
+import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.PackedInts;
+
+final class CompressingStoredFieldsReader extends StoredFieldsReader {
+
+  private final FieldInfos fieldInfos;
+  private final CompressingStoredFieldsIndex.Reader indexReader;
+  private final IndexInput fieldsStream;
+  private final int packedIntsVersion;
+  private final CompressionMode compressionMode;
+  private final Uncompressor uncompressor;
+  private final BytesRef bytes;
+  private final int numDocs;
+  private boolean closed;
+  
+  // used by clone
+  private CompressingStoredFieldsReader(CompressingStoredFieldsReader reader) {
+    this.fieldInfos = reader.fieldInfos;
+    this.fieldsStream = reader.fieldsStream.clone();
+    this.indexReader = reader.indexReader.clone();
+    this.packedIntsVersion = reader.packedIntsVersion;
+    this.compressionMode = reader.compressionMode;
+    this.uncompressor = reader.uncompressor.clone();
+    this.numDocs = reader.numDocs;
+    this.bytes = new BytesRef(reader.bytes.bytes.length);
+    this.closed = false;
+  }
+  
+  public CompressingStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+    final String segment = si.name;
+    boolean success = false;
+    fieldInfos = fn;
+    numDocs = si.getDocCount();
+    IndexInput indexStream = null;
+    try {
+      fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
+      final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
+      indexStream = d.openInput(indexStreamFN, context);
+
+      CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT);
+      CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT);
+      assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
+      assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
+
+      final int storedFieldsIndexId = indexStream.readVInt();
+      final CompressingStoredFieldsIndex storedFieldsIndex = CompressingStoredFieldsIndex.byId(storedFieldsIndexId);
+      indexReader = storedFieldsIndex.newReader(indexStream, si);
+      indexStream = null;
+
+      packedIntsVersion = fieldsStream.readVInt();
+      final int compressionModeId = fieldsStream.readVInt();
+      compressionMode = CompressionMode.byId(compressionModeId);
+      uncompressor = compressionMode.newUncompressor();
+      this.bytes = new BytesRef();
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this, indexStream);
+      }
+    }
+  }
+
+  /**
+   * @throws AlreadyClosedException if this FieldsReader is closed
+   */
+  private void ensureOpen() throws AlreadyClosedException {
+    if (closed) {
+      throw new AlreadyClosedException("this FieldsReader is closed");
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (!closed) {
+      IOUtils.close(fieldsStream, indexReader);
+      closed = true;
+    }
+  }
+
+  private static void readField(ByteArrayDataInput in, StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
+    switch (bits & TYPE_MASK) {
+      case BYTE_ARR:
+        int length = in.readVInt();
+        byte[] data = new byte[length];
+        in.readBytes(data, 0, length);
+        visitor.binaryField(info, data);
+        break;
+      case STRING:
+        length = in.readVInt();
+        data = new byte[length];
+        in.readBytes(data, 0, length);
+        visitor.stringField(info, new String(data, IOUtils.CHARSET_UTF_8));
+        break;
+      case NUMERIC_INT:
+        visitor.intField(info, in.readInt());
+        break;
+      case NUMERIC_FLOAT:
+        visitor.floatField(info, Float.intBitsToFloat(in.readInt()));
+        break;
+      case NUMERIC_LONG:
+        visitor.longField(info, in.readLong());
+        break;
+      case NUMERIC_DOUBLE:
+        visitor.doubleField(info, Double.longBitsToDouble(in.readLong()));
+        break;
+      default:
+        throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
+    }
+  }
+
+  private static void skipField(ByteArrayDataInput in, int bits) throws IOException {
+    switch (bits & TYPE_MASK) {
+      case BYTE_ARR:
+      case STRING:
+        final int length = in.readVInt();
+        in.skipBytes(length);
+        break;
+      case NUMERIC_INT:
+      case NUMERIC_FLOAT:
+        in.readInt();
+        break;
+      case NUMERIC_LONG:
+      case NUMERIC_DOUBLE:
+        in.readLong();
+        break;
+      default:
+        throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
+    }
+  }
+
+  @Override
+  public void visitDocument(int docID, StoredFieldVisitor visitor)
+      throws IOException {
+    fieldsStream.seek(indexReader.getStartPointer(docID));
+
+    final int docBase = fieldsStream.readVInt();
+    final int chunkDocs = fieldsStream.readVInt();
+    final int bitsPerValue = fieldsStream.readVInt();
+    if (docID < docBase
+        || docID >= docBase + chunkDocs
+        || docBase + chunkDocs > numDocs
+        || bitsPerValue > 31) {
+      throw new CorruptIndexException("Corrupted: docID=" + docID
+          + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs
+          + ", numDocs=" + numDocs + ", bitsPerValue=" + bitsPerValue);
+    }
+
+    final long filePointer = fieldsStream.getFilePointer();
+    final PackedInts.ReaderIterator lengths = PackedInts.getReaderIteratorNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerValue, 1);
+    int offset = 0;
+    for (int i = docBase; i < docID; ++i) {
+      offset += lengths.next();
+    }
+    final int length = (int) lengths.next();
+    // skip the last values
+    fieldsStream.seek(filePointer + (PackedInts.Format.PACKED.nblocks(bitsPerValue, chunkDocs) << 3));
+
+    uncompressor.uncompress(fieldsStream, offset, length, bytes);
+
+    final ByteArrayDataInput documentInput = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length);
+    final int numFields = documentInput.readVInt();
+    for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
+      final long infoAndBits = documentInput.readVLong();
+      final int fieldNumber = (int) (infoAndBits >>> TYPE_BITS);
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+
+      final int bits = (int) (infoAndBits & TYPE_MASK);
+      assert bits <= NUMERIC_DOUBLE: "bits=" + Integer.toHexString(bits);
+
+      switch(visitor.needsField(fieldInfo)) {
+        case YES:
+          readField(documentInput, visitor, fieldInfo, bits);
+          break;
+        case NO: 
+          skipField(documentInput, bits);
+          break;
+        case STOP: 
+          return;
+      }
+    }
+  }
+
+  @Override
+  public StoredFieldsReader clone() {
+    ensureOpen();
+    return new CompressingStoredFieldsReader(this);
+  }
+
+  CompressionMode getCompressionMode() {
+    return compressionMode;
+  }
+
+  ChunkIterator chunkIterator(int startDocID) throws IOException {
+    ensureOpen();
+    fieldsStream.seek(indexReader.getStartPointer(startDocID));
+    return new ChunkIterator();
+  }
+
+  final class ChunkIterator {
+
+    BytesRef bytes;
+    int docBase;
+    int chunkDocs;
+    int[] lengths;
+
+    private ChunkIterator() {
+      this.docBase = -1;
+      bytes = new BytesRef();
+      lengths = new int[0];
+    }
+
+    private int readHeader() throws IOException {
+      final int docBase = fieldsStream.readVInt();
+      final int chunkDocs = fieldsStream.readVInt();
+      final int bitsPerValue = fieldsStream.readVInt();
+      if (docBase < this.docBase + this.chunkDocs
+          || docBase + chunkDocs > numDocs
+          || bitsPerValue > 31) {
+        throw new CorruptIndexException("Corrupted: current docBase=" + this.docBase
+            + ", current numDocs=" + this.chunkDocs + ", new docBase=" + docBase
+            + ", new numDocs=" + chunkDocs + ", bitsPerValue=" + bitsPerValue);
+      }
+      this.docBase = docBase;
+      this.chunkDocs = chunkDocs;
+      return bitsPerValue;
+    }
+
+    /**
+     * Return the uncompressed size of the chunk
+     */
+    int chunkSize() {
+      int sum = 0;
+      for (int i = 0; i < chunkDocs; ++i) {
+        sum += lengths[i];
+      }
+      return sum;
+    }
+
+    /**
+     * Go to the chunk containing the provided doc ID.
+     */
+    void next(int doc) throws IOException {
+      assert doc >= docBase + chunkDocs : doc + " " + docBase + " " + chunkDocs;
+      // try next chunk
+      int bitsPerValue = readHeader();
+      if (docBase + chunkDocs <= doc) {
+        // doc is not in the next chunk, use seek to skip to the next document chunk
+        fieldsStream.seek(indexReader.getStartPointer(doc));
+        bitsPerValue = readHeader();
+      }
+      if (doc < docBase
+          || doc >= docBase + chunkDocs) {
+        throw new CorruptIndexException("Corrupted: docID=" + doc
+            + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs);
+      }
+
+      // decode lengths
+      if (lengths.length < chunkDocs) {
+        lengths = new int[ArrayUtil.oversize(chunkDocs, 4)];
+      }
+      final PackedInts.ReaderIterator iterator = PackedInts.getReaderIteratorNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerValue, 0);
+      for (int i = 0; i < chunkDocs; ++i) {
+        lengths[i] = (int) iterator.next();
+      }
+    }
+
+    /**
+     * Uncompress the chunk.
+     */
+    void uncompress() throws IOException {
+      // uncompress data
+      uncompressor.uncompress(fieldsStream, bytes);
+      if (bytes.length != chunkSize()) {
+        throw new CorruptIndexException("Corrupted: expected chunk size = " + chunkSize() + ", got " + bytes.length);
+      }
+    }
+
+    /**
+     * Copy compressed data.
+     */
+    void copyCompressedData(DataOutput out) throws IOException {
+      uncompressor.copyCompressedData(fieldsStream, out);
+    }
+
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
@ -0,0 +1,391 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION;
+import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.ChunkIterator;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.StorableField;
+import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.PackedInts;
+
+final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
+
+  static final int         STRING = 0x00;
+  static final int       BYTE_ARR = 0x01;
+  static final int    NUMERIC_INT = 0x02;
+  static final int  NUMERIC_FLOAT = 0x03;
+  static final int   NUMERIC_LONG = 0x04;
+  static final int NUMERIC_DOUBLE = 0x05;
+
+  static final int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
+  static final int TYPE_MASK = (int) PackedInts.maxValue(TYPE_BITS);
+
+  static final String CODEC_NAME_IDX = "CompressingStoredFieldsIndex";
+  static final String CODEC_NAME_DAT = "CompressingStoredFieldsData";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+  static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
+  static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
+
+  private final Directory directory;
+  private final String segment;
+  private CompressingStoredFieldsIndex.Writer indexWriter;
+  private IndexOutput fieldsStream;
+
+  private final CompressionMode compressionMode;
+  private final Compressor compressor;
+  private final int chunkSize;
+
+  private final GrowableByteArrayDataOutput bufferedDocs;
+  private int[] endOffsets; // end offsets in bufferedDocs
+  private int docBase; // doc ID at the beginning of the chunk
+  private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
+
+  public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si,
+      IOContext context, CompressionMode compressionMode, int chunkSize, CompressingStoredFieldsIndex storedFieldsIndex) throws IOException {
+    assert directory != null;
+    this.directory = directory;
+    this.segment = si.name;
+    this.compressionMode = compressionMode;
+    this.compressor = compressionMode.newCompressor();
+    this.chunkSize = chunkSize;
+    this.docBase = 0;
+    this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
+    this.endOffsets = new int[16];
+    this.numBufferedDocs = 0;
+
+    boolean success = false;
+    IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
+    try {
+      fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
+
+      CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
+      CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
+      assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
+      assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
+
+      indexStream.writeVInt(storedFieldsIndex.getId());
+      indexWriter = storedFieldsIndex.newWriter(indexStream);
+      indexStream = null;
+
+      fieldsStream.writeVInt(PackedInts.VERSION_CURRENT);
+      fieldsStream.writeVInt(compressionMode.getId());
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(indexStream);
+        abort();
+      }
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(fieldsStream, indexWriter);
+    } finally {
+      fieldsStream = null;
+      indexWriter = null;
+    }
+  }
+
+  private void endWithPreviousDocument() throws IOException {
+    if (numBufferedDocs > 0) {
+      assert bufferedDocs.length > 0;
+      if (numBufferedDocs == endOffsets.length) {
+        endOffsets = ArrayUtil.grow(endOffsets);
+      }
+      endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
+    }
+  }
+
+  private void addRawDocument(byte[] buf, int off, int len) throws IOException {
+    endWithPreviousDocument();
+    if (bufferedDocs.length >= chunkSize) {
+      flush();
+    }
+    bufferedDocs.writeBytes(buf, off, len);
+    ++numBufferedDocs;
+  }
+
+  @Override
+  public void startDocument(int numStoredFields) throws IOException {
+    endWithPreviousDocument();
+    if (bufferedDocs.length >= chunkSize) {
+      flush();
+    }
+    bufferedDocs.writeVInt(numStoredFields);
+    ++numBufferedDocs;
+  }
+
+  private void writeHeader(int docBase, int numBufferedDocs, int[] lengths) throws IOException {
+    // save docBase and numBufferedDocs
+    fieldsStream.writeVInt(docBase);
+    fieldsStream.writeVInt(numBufferedDocs);
+
+    // save lengths
+    final int bitsRequired = bitsRequired(lengths, numBufferedDocs);
+    assert bitsRequired <= 31;
+    fieldsStream.writeVInt(bitsRequired);
+
+    final PackedInts.Writer writer = PackedInts.getWriterNoHeader(fieldsStream, PackedInts.Format.PACKED, numBufferedDocs, bitsRequired, 1);
+    for (int i = 0; i < numBufferedDocs; ++i) {
+      assert lengths[i] > 0;
+      writer.add(lengths[i]);
+    }
+    assert writer.ord() + 1 == numBufferedDocs;
+    writer.finish();
+  }
+
+  private void flush() throws IOException {
+    indexWriter.writeIndex(numBufferedDocs, fieldsStream.getFilePointer());
+
+    // transform end offsets into lengths
+    final int[] lengths = endOffsets;
+    for (int i = numBufferedDocs - 1; i > 0; --i) {
+      lengths[i] = endOffsets[i] - endOffsets[i - 1];
+      assert lengths[i] > 0;
+    }
+    writeHeader(docBase, numBufferedDocs, lengths);
+
+    // compress stored fields to fieldsStream
+    compressor.compress(bufferedDocs.bytes, 0, bufferedDocs.length, fieldsStream);
+
+    // reset
+    docBase += numBufferedDocs;
+    numBufferedDocs = 0;
+    bufferedDocs.length = 0;
+  }
+
+  private static int bitsRequired(int[] data, int length) {
+    int or = data[0];
+    for (int i = 1; i < length; ++i) {
+      or |= data[i];
+    }
+    return PackedInts.bitsRequired(or);
+  }
+
+  @Override
+  public void writeField(FieldInfo info, StorableField field)
+      throws IOException {
+    int bits = 0;
+    final BytesRef bytes;
+    final String string;
+
+    Number number = field.numericValue();
+    if (number != null) {
+      if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+        bits = NUMERIC_INT;
+      } else if (number instanceof Long) {
+        bits = NUMERIC_LONG;
+      } else if (number instanceof Float) {
+        bits = NUMERIC_FLOAT;
+      } else if (number instanceof Double) {
+        bits = NUMERIC_DOUBLE;
+      } else {
+        throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
+      }
+      string = null;
+      bytes = null;
+    } else {
+      bytes = field.binaryValue();
+      if (bytes != null) {
+        bits = BYTE_ARR;
+        string = null;
+      } else {
+        bits = STRING;
+        string = field.stringValue();
+        if (string == null) {
+          throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
+        }
+      }
+    }
+
+    final long infoAndBits = (((long) info.number) << TYPE_BITS) | bits;
+    bufferedDocs.writeVLong(infoAndBits);
+
+    if (bytes != null) {
+      bufferedDocs.writeVInt(bytes.length);
+      bufferedDocs.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+    } else if (string != null) {
+      bufferedDocs.writeString(field.stringValue());
+    } else {
+      if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+        bufferedDocs.writeInt(number.intValue());
+      } else if (number instanceof Long) {
+        bufferedDocs.writeLong(number.longValue());
+      } else if (number instanceof Float) {
+        bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue()));
+      } else if (number instanceof Double) {
+        bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue()));
+      } else {
+        throw new AssertionError("Cannot get here");
+      }
+    }
+  }
+
+  @Override
+  public void abort() {
+    IOUtils.closeWhileHandlingException(this);
+    IOUtils.deleteFilesIgnoringExceptions(directory,
+        IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION),
+        IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION));
+  }
+
+  @Override
+  public void finish(FieldInfos fis, int numDocs) throws IOException {
+    endWithPreviousDocument();
+    if (numBufferedDocs > 0) {
+      flush();
+    }
+    if (docBase != numDocs) {
+      throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
+    }
+    indexWriter.finish(numDocs);
+    assert bufferedDocs.length == 0;
+  }
+
+  @Override
+  public int merge(MergeState mergeState) throws IOException {
+    int docCount = 0;
+    int idx = 0;
+
+    for (AtomicReader reader : mergeState.readers) {
+      final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
+      CompressingStoredFieldsReader matchingFieldsReader = null;
+      if (matchingSegmentReader != null) {
+        final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
+        // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
+        if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
+          matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
+        }
+      }
+
+      final int maxDoc = reader.maxDoc();
+      final Bits liveDocs = reader.getLiveDocs();
+
+      if (matchingFieldsReader == null) {
+        // naive merge...
+        for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
+          StoredDocument doc = reader.document(i);
+          addDocument(doc, mergeState.fieldInfos);
+          ++docCount;
+          mergeState.checkAbort.work(300);
+        }
+      } else {
+        int docID = nextLiveDoc(0, liveDocs, maxDoc);
+        if (docID < maxDoc) {
+          // not all docs were deleted
+          final ChunkIterator it = matchingFieldsReader.chunkIterator(docID);
+          int[] startOffsets = new int[0];
+          do {
+            // go to the next chunk that contains docID
+            it.next(docID);
+            // transform lengths into offsets
+            if (startOffsets.length < it.chunkDocs) {
+              startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)];
+            }
+            for (int i = 1; i < it.chunkDocs; ++i) {
+              startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
+            }
+
+            if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
+                && (numBufferedDocs == 0 || bufferedDocs.length >= chunkSize) // starting a new chunk
+                && startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
+                && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
+                && nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
+              assert docID == it.docBase;
+
+              // no need to uncompress, just copy data
+              endWithPreviousDocument();
+              if (bufferedDocs.length >= chunkSize) {
+                flush();
+              }
+              indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
+              writeHeader(this.docBase, it.chunkDocs, it.lengths);
+              it.copyCompressedData(fieldsStream);
+              this.docBase += it.chunkDocs;
+              docID = nextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
+              docCount += it.chunkDocs;
+              mergeState.checkAbort.work(300 * it.chunkDocs);
+            } else {
+              // uncompress
+              it.uncompress();
+              if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) {
+                throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
+              }
+              // copy non-deleted docs
+              for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1, liveDocs, maxDoc)) {
+                final int diff = docID - it.docBase;
+                addRawDocument(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
+                ++docCount;
+                mergeState.checkAbort.work(300);
+              }
+            }
+          } while (docID < maxDoc);
+        }
+      }
+    }
+    finish(mergeState.fieldInfos, docCount);
+    return docCount;
+  }
+
+  private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) {
+    if (liveDocs == null) {
+      return doc;
+    }
+    while (doc < maxDoc && !liveDocs.get(doc)) {
+      ++doc;
+    }
+    return doc;
+  }
+
+  private static int nextDeletedDoc(int doc, Bits liveDocs, int maxDoc) {
+    if (liveDocs == null) {
+      return maxDoc;
+    }
+    while (doc < maxDoc && liveDocs.get(doc)) {
+      ++doc;
+    }
+    return doc;
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
@ -0,0 +1,343 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A compression mode. Tells how much effort should be spent on compression and
+ * uncompression of stored fields.
+ * @lucene.experimental
+ */
+public enum CompressionMode {
+
+  /**
+   * A compression mode that trades compression ratio for speed. Although the
+   * compression ratio might remain high, compression and uncompression are
+   * very fast. Use this mode with indices that have a high update rate but
+   * should be able to load documents from disk quickly.
+   */
+  FAST(0) {
+
+    @Override
+    Compressor newCompressor() {
+      return LZ4_FAST_COMPRESSOR;
+    }
+
+    @Override
+    Uncompressor newUncompressor() {
+      return LZ4_UNCOMPRESSOR;
+    }
+
+  },
+
+  /**
+   * A compression mode that trades speed for compression ratio. Although
+   * compression and uncompression might be slow, this compression mode should
+   * provide a good compression ratio. This mode might be interesting if/when
+   * your index size is much bigger than your OS cache.
+   */
+  HIGH_COMPRESSION(1) {
+
+    @Override
+    Compressor newCompressor() {
+      return new DeflateCompressor(Deflater.BEST_COMPRESSION);
+    }
+
+    @Override
+    Uncompressor newUncompressor() {
+      return new DeflateUncompressor();
+    }
+
+  },
+
+  /**
+   * This compression mode is similar to {@link #FAST} but it spends more time
+   * compressing in order to improve the compression ratio. This compression
+   * mode is best used with indices that have a low update rate but should be
+   * able to load documents from disk quickly.
+   */
+  FAST_UNCOMPRESSION(2) {
+
+    @Override
+    Compressor newCompressor() {
+      return LZ4_HIGH_COMPRESSOR;
+    }
+
+    @Override
+    Uncompressor newUncompressor() {
+      return LZ4_UNCOMPRESSOR;
+    }
+
+  };
+
+  public static CompressionMode byId(int id) {
+    for (CompressionMode mode : CompressionMode.values()) {
+      if (mode.getId() == id) {
+        return mode;
+      }
+    }
+    throw new IllegalArgumentException("Unknown id: " + id);
+  }
+
+  private final int id;
+
+  private CompressionMode(int id) {
+    this.id = id;
+  }
+
+  /**
+   * Returns an ID for this compression mode. Should be unique across
+   * {@link CompressionMode}s as it is used for serialization and
+   * unserialization.
+   */
+  public final int getId() {
+    return id;
+  }
+
+  /**
+   * Create a new {@link Compressor} instance.
+   */
+  abstract Compressor newCompressor();
+
+  /**
+   * Create a new {@link Uncompressor} instance.
+   */
+  abstract Uncompressor newUncompressor();
+
+
+  private static final Uncompressor LZ4_UNCOMPRESSOR = new Uncompressor() {
+
+    @Override
+    public void uncompress(DataInput in, BytesRef bytes) throws IOException {
+      final int uncompressedLen = in.readVInt();
+      if (bytes.bytes.length < uncompressedLen + 8) {
+        bytes.bytes = ArrayUtil.grow(bytes.bytes, uncompressedLen + 8);
+      }
+      LZ4.uncompress(in, uncompressedLen, bytes);
+      if (bytes.length != uncompressedLen) {
+        throw new IOException("Corrupted");
+      }
+    }
+
+    @Override
+    public void uncompress(DataInput in, int offset, int length, BytesRef bytes) throws IOException {
+      final int uncompressedLen = in.readVInt();
+      if (offset > uncompressedLen) {
+        bytes.length = 0;
+        return;
+      }
+      if (bytes.bytes.length < uncompressedLen) {
+        bytes.bytes = ArrayUtil.grow(bytes.bytes, uncompressedLen);
+      }
+      LZ4.uncompress(in, offset + length, bytes);
+      bytes.offset = offset;
+      if (offset + length >= uncompressedLen) {
+        if (bytes.length != uncompressedLen) {
+          throw new IOException("Corrupted");
+        }
+        bytes.length = uncompressedLen - offset;
+      } else {
+        bytes.length = length;
+      }
+    }
+
+    public void copyCompressedData(DataInput in, DataOutput out) throws IOException {
+      final int uncompressedLen = in.readVInt();
+      out.writeVInt(uncompressedLen);
+      if (uncompressedLen == 0) {
+        out.writeByte((byte) 0); // the token
+        return;
+      }
+      int n = 0;
+      while (n < uncompressedLen) {
+        // literals
+        final byte token = in.readByte();
+        out.writeByte(token);
+        int literalLen = (token & 0xFF) >>> 4;
+        if (literalLen == 0x0F) {
+          byte len;
+          while ((len = in.readByte()) == (byte) 0xFF) {
+            literalLen += 0xFF;
+            out.writeByte(len);
+          }
+          literalLen += len & 0xFF;
+          out.writeByte(len);
+        }
+        out.copyBytes(in, literalLen);
+        n += literalLen;
+        if (n >= uncompressedLen) {
+          break;
+        }
+
+        // matchs
+        out.copyBytes(in, 2); // match dec
+        int matchLen = token & 0x0F;
+        if (matchLen == 0x0F) {
+          byte len;
+          while ((len = in.readByte()) == (byte) 0xFF) {
+            matchLen += 0xFF;
+            out.writeByte(len);
+          }
+          matchLen += len & 0xFF;
+          out.writeByte(len);
+        }
+        matchLen += LZ4.MIN_MATCH;
+        n += matchLen;
+      }
+
+      if (n != uncompressedLen) {
+        throw new IOException("Currupted compressed stream: expected " + uncompressedLen + " bytes, but got at least" + n);
+      }
+    }
+
+    @Override
+    public Uncompressor clone() {
+      return this;
+    }
+
+  };
+
+  private static final Compressor LZ4_FAST_COMPRESSOR = new Compressor() {
+
+    @Override
+    public void compress(byte[] bytes, int off, int len, DataOutput out)
+        throws IOException {
+      out.writeVInt(len);
+      LZ4.compress(bytes, off, len, out);
+    }
+
+  };
+
+  private static final Compressor LZ4_HIGH_COMPRESSOR = new Compressor() {
+
+    @Override
+    public void compress(byte[] bytes, int off, int len, DataOutput out)
+        throws IOException {
+      out.writeVInt(len);
+      LZ4.compressHC(bytes, off, len, out);
+    }
+
+  };
+
+  private static final class DeflateUncompressor extends Uncompressor {
+
+    final Inflater uncompressor;
+    byte[] compressed;
+
+    DeflateUncompressor() {
+      uncompressor = new Inflater();
+      compressed = new byte[0];
+    }
+
+    @Override
+    public void uncompress(DataInput in, BytesRef bytes) throws IOException {
+      bytes.offset = bytes.length = 0;
+
+      final int compressedLength = in.readVInt();
+      if (compressedLength > compressed.length) {
+        compressed = ArrayUtil.grow(compressed, compressedLength);
+      }
+      in.readBytes(compressed, 0, compressedLength);
+
+      uncompressor.reset();
+      uncompressor.setInput(compressed, 0, compressedLength);
+      if (uncompressor.needsInput()) {
+        return;
+      }
+
+      while (true) {
+        final int count;
+        try {
+          final int remaining = bytes.bytes.length - bytes.length;
+          count = uncompressor.inflate(bytes.bytes, bytes.length, remaining);
+        } catch (DataFormatException e) {
+          throw new IOException(e);
+        }
+        bytes.length += count;
+        if (uncompressor.finished()) {
+          break;
+        } else {
+          bytes.bytes = ArrayUtil.grow(bytes.bytes);
+        }
+      }
+    }
+
+    @Override
+    public void copyCompressedData(DataInput in, DataOutput out) throws IOException {
+      final int compressedLength = in.readVInt();
+      out.writeVInt(compressedLength);
+      out.copyBytes(in, compressedLength);
+    }
+
+    @Override
+    public Uncompressor clone() {
+      return new DeflateUncompressor();
+    }
+
+  }
+
+  private static class DeflateCompressor extends Compressor {
+
+    final Deflater compressor;
+    byte[] compressed;
+
+    DeflateCompressor(int level) {
+      compressor = new Deflater(level);
+      compressed = new byte[64];
+    }
+
+    @Override
+    public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
+      compressor.reset();
+      compressor.setInput(bytes, off, len);
+      compressor.finish();
+
+      if (compressor.needsInput()) {
+        // no output
+        out.writeVInt(0);
+        return;
+      }
+
+      int totalCount = 0;
+      for (;;) {
+        final int count = compressor.deflate(compressed, totalCount, compressed.length - totalCount);
+        totalCount += count;
+        assert totalCount <= compressed.length;
+        if (compressor.finished()) {
+          break;
+        } else {
+          compressed = ArrayUtil.grow(compressed);
+        }
+      }
+
+      out.writeVInt(totalCount);
+      out.writeBytes(compressed, totalCount);
+    }
+
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Compressor.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Compressor.java
@ -0,0 +1,36 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * A data compressor.
+ */
+abstract class Compressor {
+
+  /**
+   * Compress bytes into <code>out</code>. It it the responsibility of the
+   * compressor to add all necessary information so that a {@link Uncompressor}
+   * will know when to stop uncompressing bytes from the stream.
+   */
+  public abstract void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException;
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
@ -0,0 +1,56 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.ArrayUtil;
+
+/**
+ * A {@link DataOutput} that can be used to build a byte[].
+ */
+final class GrowableByteArrayDataOutput extends DataOutput {
+
+  byte[] bytes;
+  int length;
+
+  GrowableByteArrayDataOutput(int cp) {
+    this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
+    this.length = 0;
+  }
+
+  @Override
+  public void writeByte(byte b) throws IOException {
+    if (length >= bytes.length) {
+      bytes = ArrayUtil.grow(bytes);
+    }
+    bytes[length++] = b;
+  }
+
+  @Override
+  public void writeBytes(byte[] b, int off, int len) throws IOException {
+    final int newLength = length + len;
+    if (newLength > bytes.length) {
+      bytes = ArrayUtil.grow(bytes, newLength);
+    }
+    System.arraycopy(b, off, bytes, length, len);
+    length = newLength;
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/LZ4.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/LZ4.java
@ -0,0 +1,506 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * LZ4 compression and uncompression routines.
+ *
+ * http://code.google.com/p/lz4/
+ * http://fastcompression.blogspot.fr/p/lz4.html
+ */
+class LZ4 {
+
+  private LZ4() {}
+
+  static final int MEMORY_USAGE = 14;
+  static final int MIN_MATCH = 4; // minimum length of a match
+  static final int MAX_DISTANCE = 1 << 16; // maximum distance of a reference
+  static final int LAST_LITERALS = 5; // the last 5 bytes must be encoded as literals
+  static final int HASH_LOG_HC = 15; // log size of the dictionary for compressHC
+  static final int HASH_TABLE_SIZE_HC = 1 << HASH_LOG_HC;
+  static final int OPTIMAL_ML = 0x0F + 4 - 1; // match length that doesn't require an additional byte
+
+
+  private static int hash(int i, int hashBits) {
+    return (i * -1640531535) >>> (32 - hashBits);
+  }
+
+  private static int hashHC(int i) {
+    return hash(i, HASH_LOG_HC);
+  }
+
+  private static int readInt(byte[] buf, int i) {
+    return ((buf[i] & 0xFF) << 24) | ((buf[i+1] & 0xFF) << 16) | ((buf[i+2] & 0xFF) << 8) | (buf[i+3] & 0xFF);
+  }
+
+  private static boolean readIntEquals(byte[] buf, int i, int j) {
+    return readInt(buf, i) == readInt(buf, j);
+  }
+
+  private static int commonBytes(byte[] b, int o1, int o2, int limit) {
+    assert o1 < o2;
+    int count = 0;
+    while (o2 < limit && b[o1++] == b[o2++]) {
+      ++count;
+    }
+    return count;
+  }
+
+  private static int commonBytesBackward(byte[] b, int o1, int o2, int l1, int l2) {
+    int count = 0;
+    while (o1 > l1 && o2 > l2 && b[--o1] == b[--o2]) {
+      ++count;
+    }
+    return count;
+  }
+
+  /**
+   * Uncompress at least <code>uncompressedLen</code> bytes into <code>destBytes</code>.
+   * Please note that <code>destBytes</code> must be large enough to be able to hold
+   * <b>all</b> uncompressed data plus 8 bytes (meaning that you need to know the total
+   * uncompressed length).
+   */
+  public static void uncompress(DataInput compressed, int uncompressedLen, BytesRef destBytes) throws IOException {
+    final byte[] dest = destBytes.bytes;
+    final int destEnd = dest.length;
+    int dOff = 0;
+
+    while (dOff < uncompressedLen) {
+      // literals
+      final int token = compressed.readByte() & 0xFF;
+      int literalLen = token >>> 4;
+
+      if (literalLen != 0) {
+        if (literalLen == 0x0F) {
+          byte len;
+          while ((len = compressed.readByte()) == (byte) 0xFF) {
+            literalLen += 0xFF;
+          }
+          literalLen += len & 0xFF;
+        }
+        compressed.readBytes(dest, dOff, literalLen);
+        dOff += literalLen;
+      }
+
+      if (dOff >= uncompressedLen) {
+        break;
+      }
+
+      // matchs
+      final int matchDec = (compressed.readByte() & 0xFF) | ((compressed.readByte() & 0xFF) << 8);
+      assert matchDec > 0;
+
+      int matchLen = token & 0x0F;
+      if (matchLen == 0x0F) {
+        int len;
+        while ((len = compressed.readByte()) == (byte) 0xFF) {
+          matchLen += 0xFF;
+        }
+        matchLen += len & 0xFF;
+      }
+      matchLen += MIN_MATCH;
+
+      // copying a multiple of 8 bytes can make uncompression from 5% to 10% faster
+      final int fastLen = ((matchLen - 1) & 0xFFFFFFF8) + 8;
+      if (matchDec < matchLen || dOff + fastLen > destEnd) {
+        // overlap -> naive incremental copy
+        for (int ref = dOff - matchDec, end = dOff + matchLen; dOff < end; ++ref, ++dOff) {
+          dest[dOff] = dest[ref];
+        }
+      } else {
+        // no overlap -> arraycopy
+        System.arraycopy(dest, dOff - matchDec, dest, dOff, fastLen);
+        dOff += matchLen;
+      }
+    }
+    destBytes.offset = 0;
+    destBytes.length = dOff;
+  }
+
+  private static void encodeLen(int l, DataOutput out) throws IOException {
+    while (l >= 0xFF) {
+      out.writeByte((byte) 0xFF);
+      l -= 0xFF;
+    }
+    out.writeByte((byte) l);
+  }
+
+  private static void encodeLiterals(byte[] bytes, int token, int anchor, int literalLen, DataOutput out) throws IOException {
+    out.writeByte((byte) token);
+
+    // encode literal length
+    if (literalLen >= 0x0F) {
+      encodeLen(literalLen - 0x0F, out);
+    }
+
+    // encode literals
+    out.writeBytes(bytes, anchor, literalLen);
+  }
+
+  private static void encodeLastLiterals(byte[] bytes, int anchor, int literalLen, DataOutput out) throws IOException {
+    final int token = Math.min(literalLen, 0x0F) << 4;
+    encodeLiterals(bytes, token, anchor, literalLen, out);
+  }
+
+  private static void encodeSequence(byte[] bytes, int anchor, int matchRef, int matchOff, int matchLen, DataOutput out) throws IOException {
+    final int literalLen = matchOff - anchor;
+    assert matchLen >= 4;
+    // encode token
+    final int token = (Math.min(literalLen, 0x0F) << 4) | Math.min(matchLen - 4, 0x0F);
+    encodeLiterals(bytes, token, anchor, literalLen, out);
+
+    // encode match dec
+    final int matchDec = matchOff - matchRef;
+    assert matchDec > 0 && matchDec < 1 << 16;
+    out.writeByte((byte) matchDec);
+    out.writeByte((byte) (matchDec >>> 8));
+
+    // encode match len
+    if (matchLen >= MIN_MATCH + 0x0F) {
+      encodeLen(matchLen - 0x0F - MIN_MATCH, out);
+    }
+  }
+
+  /**
+   * Compress <code>bytes[off:off+len]</code> into <code>out</code> using
+   * 2<sup>hashLog</sup> bytes of memory. Higher values of <code>hashLog</code>
+   * improve the compression ratio.
+   */
+  public static void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
+
+    final int base = off;
+    final int end = off + len;
+
+    int anchor = off++;
+
+    if (len > LAST_LITERALS + MIN_MATCH) {
+
+      final int limit = end - LAST_LITERALS;
+      final int matchLimit = limit - MIN_MATCH;
+
+      final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
+      final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
+      final int hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
+      final PackedInts.Mutable hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
+
+      main:
+      while (off < limit) {
+        // find a match
+        int ref;
+        while (true) {
+          if (off >= matchLimit) {
+            break main;
+          }
+          final int v = readInt(bytes, off);
+          final int h = hash(v, hashLog);
+          ref = base + (int) hashTable.get(h);
+          assert PackedInts.bitsRequired(off - base) <= hashTable.getBitsPerValue();
+          hashTable.set(h, off - base);
+          if (off - ref < MAX_DISTANCE && readInt(bytes, ref) == v) {
+            break;
+          }
+          ++off;
+        }
+
+        // compute match length
+        final int matchLen = MIN_MATCH + commonBytes(bytes, ref + 4, off + 4, limit);
+
+        encodeSequence(bytes, anchor, ref, off, matchLen, out);
+        off += matchLen;
+        anchor = off;
+      }
+    }
+
+    // last literals
+    final int literalLen = end - anchor;
+    assert literalLen >= LAST_LITERALS || literalLen == len;
+    encodeLastLiterals(bytes, anchor, end - anchor, out);
+  }
+
+  private static class Match {
+    int start, ref, len;
+
+    void fix(int correction) {
+      start += correction;
+      ref += correction;
+      len -= correction;
+    }
+
+    int end() {
+      return start + len;
+    }
+  }
+
+  private static void copyTo(Match m1, Match m2) {
+    m2.len = m1.len;
+    m2.start = m1.start;
+    m2.ref = m1.ref;
+  }
+
+  private static class HashTable {
+    static final int MAX_ATTEMPTS = 256;
+    static final int MASK = MAX_DISTANCE - 1;
+    int nextToUpdate;
+    private final int base;
+    private final int[] hashTable;
+    private final short[] chainTable;
+
+    HashTable(int base) {
+      this.base = base;
+      nextToUpdate = base;
+      hashTable = new int[HASH_TABLE_SIZE_HC];
+      Arrays.fill(hashTable, -1);
+      chainTable = new short[MAX_DISTANCE];
+    }
+
+    private int hashPointer(byte[] bytes, int off) {
+      final int v = readInt(bytes, off);
+      final int h = hashHC(v);
+      return base + hashTable[h];
+    }
+
+    private int next(int off) {
+      return base + off - (chainTable[off & MASK] & 0xFFFF);
+    }
+
+    private void addHash(byte[] bytes, int off) {
+      final int v = readInt(bytes, off);
+      final int h = hashHC(v);
+      int delta = off - hashTable[h];
+      if (delta >= MAX_DISTANCE) {
+        delta = MAX_DISTANCE - 1;
+      }
+      chainTable[off & MASK] = (short) delta;
+      hashTable[h] = off - base;
+    }
+
+    void insert(int off, byte[] bytes) {
+      for (; nextToUpdate < off; ++nextToUpdate) {
+        addHash(bytes, nextToUpdate);
+      }
+    }
+
+    boolean insertAndFindBestMatch(byte[] buf, int off, int matchLimit, Match match) {
+      match.start = off;
+      match.len = 0;
+
+      insert(off, buf);
+
+      int ref = hashPointer(buf, off);
+      for (int i = 0; i < MAX_ATTEMPTS; ++i) {
+        if (ref < Math.max(base, off - MAX_DISTANCE + 1)) {
+          break;
+        }
+        if (buf[ref + match.len] == buf[off + match.len] && readIntEquals(buf, ref, off)) {
+          final int matchLen = MIN_MATCH + commonBytes(buf, ref + MIN_MATCH, off + MIN_MATCH, matchLimit);
+          if (matchLen > match.len) {
+            match.ref = ref;
+            match.len = matchLen;
+          }
+        }
+        ref = next(ref);
+      }
+
+      return match.len != 0;
+    }
+
+    boolean insertAndFindWiderMatch(byte[] buf, int off, int startLimit, int matchLimit, int minLen, Match match) {
+      match.len = minLen;
+
+      insert(off, buf);
+
+      final int delta = off - startLimit;
+      int ref = hashPointer(buf, off);
+      for (int i = 0; i < MAX_ATTEMPTS; ++i) {
+        if (ref < Math.max(base, off - MAX_DISTANCE + 1)) {
+          break;
+        }
+        if (buf[ref - delta + match.len] == buf[startLimit + match.len]
+            && readIntEquals(buf, ref, off)) {
+          final int matchLenForward = MIN_MATCH + commonBytes(buf, ref + MIN_MATCH, off + MIN_MATCH, matchLimit);
+          final int matchLenBackward = commonBytesBackward(buf, ref, off, base, startLimit);
+          final int matchLen = matchLenBackward + matchLenForward;
+          if (matchLen > match.len) {
+            match.len = matchLen;
+            match.ref = ref - matchLenBackward;
+            match.start = off - matchLenBackward;
+          }
+        }
+        ref = next(ref);
+      }
+
+      return match.len > minLen;
+    }
+
+  }
+
+  public static void compressHC(byte[] src, int srcOff, int srcLen, DataOutput out) throws IOException {
+
+    final int srcEnd = srcOff + srcLen;
+    final int matchLimit = srcEnd - LAST_LITERALS;
+
+    int sOff = srcOff;
+    int anchor = sOff++;
+
+    final HashTable ht = new HashTable(srcOff);
+    final Match match0 = new Match();
+    final Match match1 = new Match();
+    final Match match2 = new Match();
+    final Match match3 = new Match();
+
+    main:
+    while (sOff < matchLimit) {
+      if (!ht.insertAndFindBestMatch(src, sOff, matchLimit, match1)) {
+        ++sOff;
+        continue;
+      }
+
+      // saved, in case we would skip too much
+      copyTo(match1, match0);
+
+      search2:
+      while (true) {
+        assert match1.start >= anchor;
+        if (match1.end() >= matchLimit
+            || !ht.insertAndFindWiderMatch(src, match1.end() - 2, match1.start + 1, matchLimit, match1.len, match2)) {
+          // no better match
+          encodeSequence(src, anchor, match1.ref, match1.start, match1.len, out);
+          anchor = sOff = match1.end();
+          continue main;
+        }
+
+        if (match0.start < match1.start) {
+          if (match2.start < match1.start + match0.len) { // empirical
+            copyTo(match0, match1);
+          }
+        }
+        assert match2.start > match1.start;
+
+        if (match2.start - match1.start < 3) { // First Match too small : removed
+          copyTo(match2, match1);
+          continue search2;
+        }
+
+        search3:
+        while (true) {
+          if (match2.start - match1.start < OPTIMAL_ML) {
+            int newMatchLen = match1.len;
+            if (newMatchLen > OPTIMAL_ML) {
+              newMatchLen = OPTIMAL_ML;
+            }
+            if (match1.start + newMatchLen > match2.end() - MIN_MATCH) {
+              newMatchLen = match2.start - match1.start + match2.len - MIN_MATCH;
+            }
+            final int correction = newMatchLen - (match2.start - match1.start);
+            if (correction > 0) {
+              match2.fix(correction);
+            }
+          }
+
+          if (match2.start + match2.len >= matchLimit
+              || !ht.insertAndFindWiderMatch(src, match2.end() - 3, match2.start, matchLimit, match2.len, match3)) {
+            // no better match -> 2 sequences to encode
+            if (match2.start < match1.end()) {
+              if (match2.start - match1.start < OPTIMAL_ML) {
+                if (match1.len > OPTIMAL_ML) {
+                  match1.len = OPTIMAL_ML;
+                }
+                if (match1.end() > match2.end() - MIN_MATCH) {
+                  match1.len = match2.end() - match1.start - MIN_MATCH;
+                }
+                final int correction = match1.len - (match2.start - match1.start);
+                if (correction > 0) {
+                  match2.fix(correction);
+                }
+              } else {
+                match1.len = match2.start - match1.start;
+              }
+            }
+            // encode seq 1
+            encodeSequence(src, anchor, match1.ref, match1.start, match1.len, out);
+            anchor = sOff = match1.end();
+            // encode seq 2
+            encodeSequence(src, anchor, match2.ref, match2.start, match2.len, out);
+            anchor = sOff = match2.end();
+            continue main;
+          }
+
+          if (match3.start < match1.end() + 3) { // Not enough space for match 2 : remove it
+            if (match3.start >= match1.end()) { // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
+              if (match2.start < match1.end()) {
+                final int correction = match1.end() - match2.start;
+                match2.fix(correction);
+                if (match2.len < MIN_MATCH) {
+                  copyTo(match3, match2);
+                }
+              }
+
+              encodeSequence(src, anchor, match1.ref, match1.start, match1.len, out);
+              anchor = sOff = match1.end();
+
+              copyTo(match3, match1);
+              copyTo(match2, match0);
+
+              continue search2;
+            }
+
+            copyTo(match3, match2);
+            continue search3;
+          }
+
+          // OK, now we have 3 ascending matches; let's write at least the first one
+          if (match2.start < match1.end()) {
+            if (match2.start - match1.start < 0x0F) {
+              if (match1.len > OPTIMAL_ML) {
+                match1.len = OPTIMAL_ML;
+              }
+              if (match1.end() > match2.end() - MIN_MATCH) {
+                match1.len = match2.end() - match1.start - MIN_MATCH;
+              }
+              final int correction = match1.end() - match2.start;
+              match2.fix(correction);
+            } else {
+              match1.len = match2.start - match1.start;
+            }
+          }
+
+          encodeSequence(src, anchor, match1.ref, match1.start, match1.len, out);
+          anchor = sOff = match1.end();
+
+          copyTo(match2, match1);
+          copyTo(match3, match2);
+
+          continue search3;
+        }
+
+      }
+
+    }
+
+    encodeLastLiterals(src, anchor, srcEnd - anchor, out);
+  }
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Uncompressor.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/Uncompressor.java
@ -0,0 +1,56 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * An uncompressor.
+ */
+abstract class Uncompressor implements Cloneable {
+
+  /**
+   * Uncompress bytes. This method is free to resize <code>bytes</code> in case
+   * it is too small to hold all the uncompressed data.
+   */
+  public abstract void uncompress(DataInput in, BytesRef bytes) throws IOException;
+
+  /**
+   * Method to use if you are only interested into <code>length</code>
+   * uncompressed bytes starting at offset <code>offset</code>. Some compression
+   * codecs might have optimizations for this special case.
+   */
+  public void uncompress(DataInput in, int offset, int length, BytesRef bytes) throws IOException {
+    uncompress(in, bytes);
+    if (bytes.length < offset + length) {
+      throw new IndexOutOfBoundsException((offset + length) + " > " + bytes.length);
+    }
+    bytes.offset += offset;
+    bytes.length = length;
+  }
+
+  public abstract void copyCompressedData(DataInput in, DataOutput out) throws IOException;
+
+  @Override
+  public abstract Uncompressor clone();
+
+}
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/package.html
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/package.html
@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+StoredFieldsFormat that allows cross-document and cross-field compression of stored fields.
+</body>
+</html>
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
@ -0,0 +1,111 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
+public abstract class AbstractTestCompressionMode extends LuceneTestCase {
+
+  CompressionMode mode;
+
+  static byte[] randomArray() {
+    final int max = random().nextBoolean()
+        ? random().nextInt(4)
+        : random().nextInt(256);
+    final int length = random().nextBoolean()
+        ? random().nextInt(20)
+        : random().nextInt(192 * 1024);
+    final byte[] arr = new byte[length];
+    for (int i = 0; i < arr.length; ++i) {
+      arr[i] = (byte) RandomInts.randomIntBetween(random(), 0, max);
+    }
+    return arr;
+  }
+
+  byte[] compress(byte[] uncompressed) throws IOException {
+    Compressor compressor = mode.newCompressor();
+    return compress(compressor, uncompressed);
+  }
+
+  static byte[] compress(Compressor compressor, byte[] uncompressed) throws IOException {
+    byte[] compressed = new byte[uncompressed.length * 2 + 16]; // should be enough
+    ByteArrayDataOutput out = new ByteArrayDataOutput(compressed);
+    compressor.compress(uncompressed, 0, uncompressed.length, out);
+    final int compressedLen = out.getPosition();
+    return Arrays.copyOf(compressed, compressedLen);
+  }
+
+  byte[] uncompress(byte[] compressed) throws IOException {
+    Uncompressor uncompressor = mode.newUncompressor();
+    return uncompress(uncompressor, compressed);
+  }
+
+  static byte[] uncompress(Uncompressor uncompressor, byte[] compressed) throws IOException {
+    final BytesRef bytes = new BytesRef();
+    uncompressor.uncompress(new ByteArrayDataInput(compressed), bytes);
+    return Arrays.copyOfRange(bytes.bytes, bytes.offset, bytes.offset + bytes.length);
+  }
+
+  byte[] uncompress(byte[] compressed, int offset, int length) throws IOException {
+    Uncompressor uncompressor = mode.newUncompressor();
+    final BytesRef bytes = new BytesRef();
+    uncompressor.uncompress(new ByteArrayDataInput(compressed), offset, length, bytes);
+    return Arrays.copyOfRange(bytes.bytes, bytes.offset, bytes.offset + bytes.length);
+  }
+
+  public void testUncompress() throws IOException {
+    final byte[] uncompressed = randomArray();
+    final byte[] compressed = compress(uncompressed);
+    final byte[] restored = uncompress(compressed);
+    assertArrayEquals(uncompressed, restored);
+  }
+
+  public void testPartialUncompress() throws IOException {
+    final int iterations = atLeast(10);
+    for (int i = 0; i < iterations; ++i) {
+      final byte[] uncompressed = randomArray();
+      final byte[] compressed = compress(uncompressed);
+      final int offset, length;
+      if (uncompressed.length == 0) {
+        offset = length = 0;
+      } else {
+        offset = random().nextInt(uncompressed.length);
+        length = random().nextInt(uncompressed.length - offset);
+      }
+      final byte[] restored = uncompress(compressed, offset, length);
+      assertArrayEquals(Arrays.copyOfRange(uncompressed, offset, offset + length), restored);
+    }
+  }
+
+  public void testCopyCompressedData() throws IOException {
+    final byte[] uncompressed = randomArray();
+    final byte[] compressed = compress(uncompressed);
+    GrowableByteArrayDataOutput out = new GrowableByteArrayDataOutput(uncompressed.length);
+    mode.newUncompressor().copyCompressedData(new ByteArrayDataInput(compressed), out);
+    assertArrayEquals(compressed, Arrays.copyOf(out.bytes, out.length));
+  }
+
+}
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
@ -0,0 +1,193 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.lucene40.Lucene40Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.StorableField;
+import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
+public class TestCompressingStoredFieldsFormat extends LuceneTestCase {
+
+  public void testWriteReadMerge() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
+    iwConf.setCodec(CompressingCodec.randomInstance(random()));
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
+    final int docCount = atLeast(200);
+    final byte[][][] data = new byte [docCount][][];
+    for (int i = 0; i < docCount; ++i) {
+      final int fieldCount = rarely()
+          ? RandomInts.randomIntBetween(random(), 1, 500)
+          : RandomInts.randomIntBetween(random(), 1, 5);
+      data[i] = new byte[fieldCount][];
+      for (int j = 0; j < fieldCount; ++j) {
+        final int length = rarely()
+            ? random().nextInt(1000)
+            : random().nextInt(10);
+        final byte[] arr = new byte[length];
+        final int max = rarely() ? 256 : 2;
+        for (int k = 0; k < length; ++k) {
+          arr[k] = (byte) random().nextInt(max);
+        }
+        data[i][j] = arr;
+      }
+    }
+
+    final FieldType type = new FieldType(StringField.TYPE_STORED);
+    type.setIndexed(false);
+    type.freeze();
+    IntField id = new IntField("id", 0, Store.YES);
+    for (int i = 0; i < data.length; ++i) {
+      Document doc = new Document();
+      doc.add(id);
+      id.setIntValue(i);
+      for (int j = 0; j < data[i].length; ++j) {
+        Field f = new Field("bytes" + j, data[i][j], type);
+        doc.add(f);
+      }
+      iw.w.addDocument(doc);
+      if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
+        iw.w.close();
+        // switch codecs
+        if (iwConf.getCodec() instanceof Lucene40Codec) {
+          iwConf.setCodec(CompressingCodec.randomInstance(random()));
+        } else {
+          iwConf.setCodec(new Lucene40Codec());
+        }
+        iw = new RandomIndexWriter(random(), dir, iwConf);
+      }
+    }
+
+    for (int i = 0; i < 10; ++i) {
+      final int min = random().nextInt(data.length);
+      final int max = min + random().nextInt(20);
+      iw.deleteDocuments(NumericRangeQuery.newIntRange("id", min, max, true, false));
+    }
+
+    iw.forceMerge(2); // force merges with deletions
+
+    iw.commit();
+
+    final DirectoryReader ir = DirectoryReader.open(dir);
+    assertTrue(ir.numDocs() > 0);
+    int numDocs = 0;
+    for (int i = 0; i < ir.maxDoc(); ++i) {
+      final StoredDocument doc = ir.document(i);
+      if (doc == null) {
+        continue;
+      }
+      ++ numDocs;
+      final int docId = doc.getField("id").numericValue().intValue();
+      assertEquals(data[docId].length + 1, doc.getFields().size());
+      for (int j = 0; j < data[docId].length; ++j) {
+        final byte[] arr = data[docId][j];
+        final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
+        final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
+        assertArrayEquals(arr, arr2);
+      }
+    }
+    assertTrue(ir.numDocs() <= numDocs);
+    ir.close();
+
+    iw.deleteAll();
+    iw.commit();
+    iw.forceMerge(1);
+    iw.close();
+    dir.close();
+  }
+
+  public void testReadSkip() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
+    iwConf.setCodec(CompressingCodec.randomInstance(random()));
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
+
+    FieldType ft = new FieldType();
+    ft.setStored(true);
+    ft.freeze();
+    
+    final String string = _TestUtil.randomSimpleString(random(), 50);
+    final byte[] bytes = string.getBytes("UTF-8");
+    final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
+    final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
+    final float f = random().nextFloat();
+    final double d = random().nextDouble();
+
+    List<Field> fields = Arrays.asList(
+        new Field("bytes", bytes, ft),
+        new Field("string", string, ft),
+        new LongField("long", l, Store.YES),
+        new IntField("int", i, Store.YES),
+        new FloatField("float", f, Store.YES),
+        new DoubleField("double", d, Store.YES)
+    );
+
+    for (int k = 0; k < 100; ++k) {
+      Document doc = new Document();
+      for (Field fld : fields) {
+        doc.add(fld);
+      }
+      iw.w.addDocument(doc);
+    }
+    iw.close();
+
+    final DirectoryReader reader = DirectoryReader.open(dir);
+    final int docID = random().nextInt(100);
+    for (Field fld : fields) {
+      String fldName = fld.name();
+      final StoredDocument sDoc = reader.document(docID, Collections.singleton(fldName));
+      final StorableField sField = sDoc.getField(fldName);
+      if (Field.class.equals(fld.getClass())) {
+        assertEquals(fld.binaryValue(), sField.binaryValue());
+        assertEquals(fld.stringValue(), sField.stringValue());
+      } else {
+        assertEquals(fld.numericValue(), sField.numericValue());
+      }
+    }
+    reader.close();
+    dir.close();
+  }
+
+}
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastCompressionMode.java
@ -0,0 +1,27 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestFastCompressionMode extends AbstractTestCompressionMode {
+
+  public void setUp() throws Exception {
+    super.setUp();
+    mode = CompressionMode.FAST;
+  }
+
+}
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastUncompressionMode.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestFastUncompressionMode.java
@ -0,0 +1,28 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class TestFastUncompressionMode extends AbstractTestCompressionMode {
+
+  public void setUp() throws Exception {
+    super.setUp();
+    mode = CompressionMode.FAST_UNCOMPRESSION;
+  }
+
+}
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestHighCompressionMode.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestHighCompressionMode.java
@ -0,0 +1,27 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class TestHighCompressionMode extends AbstractTestCompressionMode {
+
+  public void setUp() throws Exception {
+    super.setUp();
+    mode = CompressionMode.HIGH_COMPRESSION;
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
@ -37,7 +37,7 @@ final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
    this.format = format;
    bulkOperation = BulkOperation.of(format, bitsPerValue);
    iterations = bulkOperation.computeIterations(valueCount, mem);
-    assert iterations > 0;
+    assert valueCount == 0 || iterations > 0;
    nextBlocks = new long[iterations * bulkOperation.blockCount()];
    nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
    assert iterations * bulkOperation.valueCount() == nextValues.longs.length;
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
@ -0,0 +1,64 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Random;
+
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40Codec;
+
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+
+/**
+ * A codec that uses {@link CompressingStoredFieldsFormat} for its stored
+ * fields and delegates to {@link Lucene40Codec} for everything else.
+ */
+public class CompressingCodec extends FilterCodec {
+
+  /**
+   * Create a random instance.
+   */
+  public static CompressingCodec randomInstance(Random random) {
+    final CompressionMode mode = RandomPicks.randomFrom(random, CompressionMode.values());
+    final int chunkSize = RandomInts.randomIntBetween(random, 1, 500);
+    final CompressingStoredFieldsIndex index = RandomPicks.randomFrom(random, CompressingStoredFieldsIndex.values());
+    return new CompressingCodec(mode, chunkSize, index);
+  }
+
+  private final CompressingStoredFieldsFormat storedFieldsFormat;
+
+  /**
+   * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(CompressionMode, int, CompressingStoredFieldsIndex)
+   */
+  public CompressingCodec(CompressionMode compressionMode, int chunkSize,
+      CompressingStoredFieldsIndex storedFieldsIndexFormat) {
+    super("Compressing", new Lucene40Codec());
+    this.storedFieldsFormat = new CompressingStoredFieldsFormat(compressionMode, chunkSize, storedFieldsIndexFormat);
+  }
+
+  public CompressingCodec() {
+    this(CompressionMode.FAST, 1 << 14, CompressingStoredFieldsIndex.MEMORY_CHUNK);
+  }
+
+  @Override
+  public StoredFieldsFormat storedFieldsFormat() {
+    return storedFieldsFormat;
+  }
+}
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/package.html
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/package.html
@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Support for testing {@link org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat}.
+</body>
+</html>
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@ -31,6 +31,7 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.appending.AppendingCodec;
 import org.apache.lucene.codecs.asserting.AssertingCodec;
+import org.apache.lucene.codecs.compressing.CompressingCodec;
 import org.apache.lucene.codecs.lucene40.Lucene40Codec;
 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
@ -165,6 +166,8 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
      codec = new AppendingCodec();
    } else if ("Asserting".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 7 && !shouldAvoidCodec("Asserting"))) {
      codec = new AssertingCodec();
+    } else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
+      codec = CompressingCodec.randomInstance(random);
    } else if (!"random".equals(TEST_CODEC)) {
      codec = Codec.forName(TEST_CODEC);
    } else if ("random".equals(TEST_POSTINGSFORMAT)) {
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@ -14,3 +14,4 @@
 #  limitations under the License.

 org.apache.lucene.codecs.asserting.AssertingCodec
+org.apache.lucene.codecs.compressing.CompressingCodec