LUCENE-1960: Add support for reading Field.Store.COMPRESS fields again and decompress on merge/optimize. Also convert to a new stored file version number.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829972 13f79535-47bb-0310-9956-ffa450edef68
2009-10-26 21:16:57 +00:00 · 2009-10-26 21:16:57 +00:00 · c61fa48daf
parent 6a1f2d5729
commit c61fa48daf
6 changed files with 259 additions and 49 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -15,6 +15,25 @@ Changes in runtime behavior
 * LUCENE-1677: Remove the system property to set SegmentReader class
  implementation.  (Uwe Schindler)

+* LUCENE-1960: As a consequence of the removal of Field.Store.COMPRESS,
+  support for this type of fields was removed. Lucene 3.0 is still able
+  to read indexes with compressed fields, but as soon as merges occur
+  or the index is optimized, all compressed fields are decompressed
+  and converted to Field.Store.YES. Because of this, indexes with
+  compressed fields can suddenly get larger. Also the first merge with
+  decompression cannot be done in raw mode, it is therefore slower.
+  This change has no effect for code that uses such old indexes,
+  they behave as before (fields are automatically decompressed
+  during read). Indexes converted to Lucene 3.0 format cannot be read
+  anymore with previous versions.
+  It is recommended to optimize your indexes after upgrading to convert
+  to the new format and decompress all fields.
+  If you want compressed fields, you can use CompressionTools, that
+  creates compressed byte[] to be added as binary stored field. This
+  cannot be done automatically, as you also have to decompress such
+  fields when reading. You have to reindex to do that.
+  (Michael Busch, Uwe Schindler)
+
 API Changes

 * LUCENE-1257, LUCENE-1984, LUCENE-1985, ...: Port to Java 1.5 [not yet finished].
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@ -19,6 +19,7 @@ package org.apache.lucene.index;

 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.CompressionTools;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
@ -32,6 +33,7 @@ import org.apache.lucene.util.CloseableThreadLocal;

 import java.io.IOException;
 import java.io.Reader;
+import java.util.zip.DataFormatException;

 /**
 * Class responsible for access to stored document fields.
@ -203,7 +205,11 @@ final class FieldsReader implements Cloneable {
  }

  boolean canReadRawDocs() {
-    return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+    // Disable reading raw docs in 2.x format, because of the removal of compressed
+    // fields in 3.0. We don't want rawDocs() to decode field bits to figure out
+    // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
+    // for <3.0 indexes.
+    return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
  }

  final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
@ -219,31 +225,34 @@ final class FieldsReader implements Cloneable {
      FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
      
      byte bits = fieldsStream.readByte();
-      assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+      assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;

+      boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
+      assert (compressed ? (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true)
+        : "compressed fields are only allowed in indexes of version <= 2.9";
      boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
      boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
      //TODO: Find an alternative approach here if this list continues to grow beyond the
      //list of 5 or 6 currently here.  See Lucene 762 for discussion
      if (acceptField.equals(FieldSelectorResult.LOAD)) {
-        addField(doc, fi, binary, tokenize);
+        addField(doc, fi, binary, compressed, tokenize);
      }
      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
-        addField(doc, fi, binary, tokenize);
+        addField(doc, fi, binary, compressed, tokenize);
        break;//Get out of this loop
      }
      else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
-        addFieldLazy(doc, fi, binary, tokenize);
+        addFieldLazy(doc, fi, binary, compressed, tokenize);
      }
      else if (acceptField.equals(FieldSelectorResult.SIZE)){
-        skipField(binary, addFieldSize(doc, fi, binary));
+        skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
      }
      else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
-        addFieldSize(doc, fi, binary);
+        addFieldSize(doc, fi, binary, compressed);
        break;
      }
      else {
-        skipField(binary);
+        skipField(binary, compressed);
      }
    }

@ -280,12 +289,12 @@ final class FieldsReader implements Cloneable {
   * Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
   * This will have the most payoff on large fields.
   */
-  private void skipField(boolean binary) throws IOException {
-    skipField(binary, fieldsStream.readVInt());
+  private void skipField(boolean binary, boolean compressed) throws IOException {
+    skipField(binary, compressed, fieldsStream.readVInt());
  }
  
-  private void skipField(boolean binary, int toRead) throws IOException {
-   if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary) {
+  private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
+   if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
     fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
   } else {
     // We need to skip chars.  This will slow us down, but still better
@ -293,12 +302,12 @@ final class FieldsReader implements Cloneable {
   }
  }

-  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
+  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
    if (binary) {
      int toRead = fieldsStream.readVInt();
      long pointer = fieldsStream.getFilePointer();
      //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
-      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
+      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed));
      //Need to move the pointer ahead by toRead positions
      fieldsStream.seek(pointer + toRead);
    } else {
@ -307,43 +316,75 @@ final class FieldsReader implements Cloneable {
      Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

      AbstractField f;
-      int length = fieldsStream.readVInt();
-      long pointer = fieldsStream.getFilePointer();
-      //Skip ahead of where we are by the length of what is stored
-      if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
-        fieldsStream.seek(pointer+length);
-      else
-        fieldsStream.skipChars(length);
-      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
-      f.setOmitNorms(fi.omitNorms);
-      f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+      if (compressed) {
+        int toRead = fieldsStream.readVInt();
+        long pointer = fieldsStream.getFilePointer();
+        f = new LazyField(fi.name, store, toRead, pointer, binary, compressed);
+        //skip over the part that we aren't loading
+        fieldsStream.seek(pointer + toRead);
+        f.setOmitNorms(fi.omitNorms);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+      } else {
+        int length = fieldsStream.readVInt();
+        long pointer = fieldsStream.getFilePointer();
+        //Skip ahead of where we are by the length of what is stored
+        if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
+          fieldsStream.seek(pointer+length);
+        } else {
+          fieldsStream.skipChars(length);
+        }
+        f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed);
+        f.setOmitNorms(fi.omitNorms);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+      }
+      
      doc.add(f);
    }

  }

-  private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
+  private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {

    //we have a binary stored field, and it may be compressed
    if (binary) {
      int toRead = fieldsStream.readVInt();
      final byte[] b = new byte[toRead];
      fieldsStream.readBytes(b, 0, b.length);
-      doc.add(new Field(fi.name, b, Field.Store.YES));
+      if (compressed) {
+        doc.add(new Field(fi.name, uncompress(b), Field.Store.YES));
+      } else {
+        doc.add(new Field(fi.name, b, Field.Store.YES));
+      }
    } else {
      Field.Store store = Field.Store.YES;
      Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
      Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

      AbstractField f;
-      f = new Field(fi.name,     // name
-    		false,
-              fieldsStream.readString(), // read value
-              store,
-              index,
-              termVector);
-      f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
-      f.setOmitNorms(fi.omitNorms);
+      if (compressed) {
+        int toRead = fieldsStream.readVInt();
+
+        final byte[] b = new byte[toRead];
+        fieldsStream.readBytes(b, 0, b.length);
+        f = new Field(fi.name,      // field name
+                false,
+                new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
+                store,
+                index,
+                termVector);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+        f.setOmitNorms(fi.omitNorms);
+      } else {
+        f = new Field(fi.name,     // name
+         false,
+                fieldsStream.readString(), // read value
+                store,
+                index,
+                termVector);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+        f.setOmitNorms(fi.omitNorms);
+      }
+      
      doc.add(f);
    }
  }
@ -351,8 +392,8 @@ final class FieldsReader implements Cloneable {
  // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
  // Read just the size -- caller must skip the field content to continue reading fields
  // Return the size in bytes or chars, depending on field type
-  private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
-    int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
+  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
+    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
    byte[] sizebytes = new byte[4];
    sizebytes[0] = (byte) (bytesize>>>24);
    sizebytes[1] = (byte) (bytesize>>>16);
@ -369,8 +410,10 @@ final class FieldsReader implements Cloneable {
  private class LazyField extends AbstractField implements Fieldable {
    private int toRead;
    private long pointer;
+    /** @deprecated Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0. */
+    private boolean isCompressed;

-    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
      super(name, store, Field.Index.NO, Field.TermVector.NO);
      this.toRead = toRead;
      this.pointer = pointer;
@ -378,9 +421,10 @@ final class FieldsReader implements Cloneable {
      if (isBinary)
        binaryLength = toRead;
      lazy = true;
+      this.isCompressed = isCompressed;
    }

-    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
      super(name, store, index, termVector);
      this.toRead = toRead;
      this.pointer = pointer;
@ -388,6 +432,7 @@ final class FieldsReader implements Cloneable {
      if (isBinary)
        binaryLength = toRead;
      lazy = true;
+      this.isCompressed = isCompressed;
    }

    private IndexInput getFieldStream() {
@ -427,15 +472,21 @@ final class FieldsReader implements Cloneable {
          IndexInput localFieldsStream = getFieldStream();
          try {
            localFieldsStream.seek(pointer);
-            if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
-              byte[] bytes = new byte[toRead];
-              localFieldsStream.readBytes(bytes, 0, toRead);
-              fieldsData = new String(bytes, "UTF-8");
+            if (isCompressed) {
+              final byte[] b = new byte[toRead];
+              localFieldsStream.readBytes(b, 0, b.length);
+              fieldsData = new String(uncompress(b), "UTF-8");
            } else {
-              //read in chars b/c we already know the length we need to read
-              char[] chars = new char[toRead];
-              localFieldsStream.readChars(chars, 0, toRead);
-              fieldsData = new String(chars);
+              if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
+                byte[] bytes = new byte[toRead];
+                localFieldsStream.readBytes(bytes, 0, toRead);
+                fieldsData = new String(bytes, "UTF-8");
+              } else {
+                //read in chars b/c we already know the length we need to read
+                char[] chars = new char[toRead];
+                localFieldsStream.readChars(chars, 0, toRead);
+                fieldsData = new String(chars);
+              }
            }
          } catch (IOException e) {
            throw new FieldReaderException(e);
@ -484,7 +535,11 @@ final class FieldsReader implements Cloneable {
          try {
            localFieldsStream.seek(pointer);
            localFieldsStream.readBytes(b, 0, toRead);
-            fieldsData = b;
+            if (isCompressed == true) {
+              fieldsData = uncompress(b);
+            } else {
+              fieldsData = b;
+            }
          } catch (IOException e) {
            throw new FieldReaderException(e);
          }
@ -498,4 +553,16 @@ final class FieldsReader implements Cloneable {
        return null;     
    }
  }
+
+  private byte[] uncompress(byte[] b)
+          throws CorruptIndexException {
+    try {
+      return CompressionTools.decompress(b);
+    } catch (DataFormatException e) {
+      // this will happen if the field is not compressed
+      CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
+      newException.initCause(e);
+      throw newException;
+    }
+  }
 }
--- a/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/src/java/org/apache/lucene/index/FieldsWriter.java
@ -31,16 +31,22 @@ final class FieldsWriter
  static final byte FIELD_IS_TOKENIZED = 0x1;
  static final byte FIELD_IS_BINARY = 0x2;
  
+  /** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0 */
+  static final byte FIELD_IS_COMPRESSED = 0x4;
+
  // Original format
  static final int FORMAT = 0;

  // Changed strings to UTF8
  static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
  
+  // Lucene 3.0: Removal of compressed fields
+  static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+
  // NOTE: if you introduce a new format, make it 1 higher
  // than the current one, and always change this if you
  // switch to a new format!
-  static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+  static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
  
    private FieldInfos fieldInfos;

--- a/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -22,21 +22,28 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.List;
+import java.util.ArrayList;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;

 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.FieldSelectorResult;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;

@ -127,22 +134,105 @@ public class TestBackwardsCompatibility extends LuceneTestCase
                             "23.nocfs",
                             "24.cfs",
                             "24.nocfs",
+                             "29.cfs",
+                             "29.nocfs",
  };
  
+  private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException {
+    int count = 0;
+    final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2;
+    // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
+    final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length;
+    
+    IndexReader reader = IndexReader.open(dir, true);
+    try {
+      // look into sub readers and check if raw merge is on/off
+      List<IndexReader> readers = new ArrayList<IndexReader>();
+      ReaderUtil.gatherSubReaders(readers, reader);
+      for (IndexReader ir : readers) {
+        final FieldsReader fr = ((SegmentReader) ir).getFieldsReader();
+        assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index",
+          shouldStillBeCompressed != fr.canReadRawDocs());
+      }
+    
+      // test that decompression works correctly
+      for(int i=0; i<reader.maxDoc(); i++) {
+        if (!reader.isDeleted(i)) {
+          Document d = reader.document(i);
+          if (d.get("content3") != null) continue;
+          count++;
+          Fieldable compressed = d.getFieldable("compressed");
+          if (Integer.parseInt(d.get("id")) % 2 == 0) {
+            assertFalse(compressed.isBinary());
+            assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue());
+          } else {
+            assertTrue(compressed.isBinary());
+            assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS, compressed.getBinaryValue()));
+          }
+        }
+      }
+      
+      // check if field was decompressed after optimize
+      for(int i=0; i<reader.maxDoc(); i++) {
+        if (!reader.isDeleted(i)) {
+          Document d = reader.document(i, new FieldSelector() {
+            public FieldSelectorResult accept(String fieldName) {
+              return ("compressed".equals(fieldName)) ? FieldSelectorResult.SIZE : FieldSelectorResult.LOAD;
+            }
+          });
+          if (d.get("content3") != null) continue;
+          count++;
+          // read the size from the binary value using DataInputStream (this prevents us from doing the shift ops ourselves):
+          final DataInputStream ds = new DataInputStream(new ByteArrayInputStream(d.getFieldable("compressed").getBinaryValue()));
+          final int actualSize = ds.readInt();
+          ds.close();
+          final int compressedSize = Integer.parseInt(d.get("compressedSize"));
+          final boolean binary = Integer.parseInt(d.get("id")) % 2 > 0;
+          final int shouldSize = shouldStillBeCompressed ?
+            compressedSize :
+            (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
+          assertEquals("size incorrect", shouldSize, actualSize);
+          if (!shouldStillBeCompressed) {
+            assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize);
+          }
+        }
+      }
+      assertEquals("correct number of tests", 34 * 2, count);
+    } finally {
+      reader.close();
+    }
+  }
+
  public void testOptimizeOldIndex() throws IOException {
+    int hasTested29 = 0;
+    
    for(int i=0;i<oldNames.length;i++) {
      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
      unzip(dirName, oldNames[i]);
      String fullPath = fullDir(oldNames[i]);
      Directory dir = FSDirectory.open(new File(fullPath));
+
+      if (oldNames[i].startsWith("29.")) {
+        assertCompressedFields29(dir, true);
+        hasTested29++;
+      }
+
      IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
      w.optimize();
      w.close();

      _TestUtil.checkIndex(dir);
+      
+      if (oldNames[i].startsWith("29.")) {
+        assertCompressedFields29(dir, false);
+        hasTested29++;
+      }
+
      dir.close();
      rmDir(oldNames[i]);
    }
+    
+    assertEquals("test for compressed field should have run 4 times", 4, hasTested29);
  }

  public void testSearchOldIndex() throws IOException {
@ -203,7 +293,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase
            !oldName.startsWith("22.")) {

          if (d.getField("content3") == null) {
-            assertEquals(5, fields.size());
+            final int numFields = oldName.startsWith("29.") ? 7 : 5;
+            assertEquals(numFields, fields.size());
            Field f = (Field) d.getField("id");
            assertEquals(""+i, f.stringValue());

@ -497,6 +588,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase
    doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+    /* This was used in 2.9 to generate an index with compressed field:
+    if (id % 2 == 0) {
+      doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED));
+      doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
+    } else {
+      doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS));    
+      doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
+    }
+    */
    writer.addDocument(doc);
  }

@ -527,4 +627,22 @@ public class TestBackwardsCompatibility extends LuceneTestCase
  public static String fullDir(String dirName) throws IOException {
    return new File(System.getProperty("tempDir"), dirName).getCanonicalPath();
  }
+
+  static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in 3.0 as an uncompressed field after merge";
+  // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally handled as binary;
+  // do it in the same way like FieldsWriter, do not use CompressionTools.compressString() for compressed fields:
+  /* This was used in 2.9 to generate an index with compressed field:
+  static final int TEXT_COMPRESSED_LENGTH;
+  static {
+    try {
+      TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length;
+    } catch (Exception e) {
+      throw new RuntimeException();
+    }
+  }
+  */
+  static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
+  /* This was used in 2.9 to generate an index with compressed field:
+  static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length;
+  */
 }
--- a/src/test/org/apache/lucene/index/index.29.cfs.zip
+++ b/src/test/org/apache/lucene/index/index.29.cfs.zip
--- a/src/test/org/apache/lucene/index/index.29.nocfs.zip
+++ b/src/test/org/apache/lucene/index/index.29.nocfs.zip