LUCENE-652: add org.apache.lucene.document.CompressionTools; deprecate Field.Store.COMPRESS

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@756635 13f79535-47bb-0310-9956-ffa450edef68
2009-03-20 17:13:42 +00:00 · 2009-03-20 17:13:42 +00:00 · 39c8421992
parent dc1447005d
commit 39c8421992
6 changed files with 160 additions and 81 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -59,6 +59,10 @@ API Changes
 9. LUCENE-1186: Add Analyzer.close() to free internal ThreadLocal
   resources.  (Christian Kohlschütter via Mike McCandless)
 10. LUCENE-652: Added org.apache.lucene.document.CompressionTools, to
    enable compressing & decompressing binary content, external to
    Lucene's indexing.  Deprecated Field.Store.COMPRESS.
 Bug fixes
 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
--- a/src/java/org/apache/lucene/document/CompressionTools.java
+++ b/src/java/org/apache/lucene/document/CompressionTools.java
@ -0,0 +1,110 @@
 package org.apache.lucene.document;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.util.zip.Deflater;
 import java.util.zip.Inflater;
 import java.util.zip.DataFormatException;
 import java.io.ByteArrayOutputStream;
 /** Simple utility class providing static methods to
 *  compress and decompress binary data for stored fields.
 *  This class uses java.util.zip.Deflater and Inflater
 *  classes to compress and decompress.
 *
 *  To compress a String:
 *  <pre>
 *    String string = ...
 *    byte[] bytes = compress(string.getBytes("UTF-8");
 *  </pre>
 *  and  to decompress:
 *  <pre>
 *    new String(decompress(bytes), "UTF-8");
 *  </pre>
 */
 public class CompressionTools {
  // Export only static methods
  private CompressionTools() {}
  /** Compresses the specified byte range using the
   *  specified compressionLevel (constants are defined in
   *  java.util.zip.Deflater). */
  public static byte[] compress(byte[] value, int offset, int length, int compressionLevel) {
    /* Create an expandable byte array to hold the compressed data.
     * You cannot use an array that's the same size as the orginal because
     * there is no guarantee that the compressed data will be smaller than
     * the uncompressed data. */
    ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
    Deflater compressor = new Deflater();
    try {
      compressor.setLevel(compressionLevel);
      compressor.setInput(value, offset, length);
      compressor.finish();
      // Compress the data
      final byte[] buf = new byte[1024];
      while (!compressor.finished()) {
        int count = compressor.deflate(buf);
        bos.write(buf, 0, count);
      }
    } finally {
      compressor.end();
    }
    return bos.toByteArray();
  }
  /** Compresses the specified byte range, with default BEST_COMPRESSION level */
  public static byte[] compress(byte[] value, int offset, int length) {
    return compress(value, offset, length, Deflater.BEST_COMPRESSION);
  }
  /** Compresses all bytes in the array, with default BEST_COMPRESSION level */
  public static byte[] compress(byte[] value) {
    return compress(value, 0, value.length, Deflater.BEST_COMPRESSION);
  }
  /** Decompress the byte array previously returned by
   *  compress */
  public static byte[] decompress(byte[] value) throws DataFormatException {
    // Create an expandable byte array to hold the decompressed data
    ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
    Inflater decompressor = new Inflater();
    try {
      decompressor.setInput(value);
      // Decompress the data
      final byte[] buf = new byte[1024];
      while (!decompressor.finished()) {
        int count = decompressor.inflate(buf);
        bos.write(buf, 0, count);
      }
    } finally {  
      decompressor.end();
    }
    return bos.toByteArray();
  }
 }
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@ -43,6 +43,7 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
    /** Store the original field value in the index in a compressed form. This is
     * useful for long documents and for binary valued fields.
     * @deprecated Please use {@link CompressionTools} instead
     */
    public static final Store COMPRESS = new Store("COMPRESS");
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@ -25,11 +25,9 @@ import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.util.CloseableThreadLocal;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;
 /**
 * Class responsible for access to stored document fields.
@ -596,40 +594,19 @@ final class FieldsReader implements Cloneable {
        return null;     
    }
  }
  private final byte[] uncompress(final byte[] input)
          throws CorruptIndexException, IOException {
    // Create an expandable byte array to hold the decompressed data
    ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
    Inflater decompressor = new Inflater();
  private byte[] uncompress(byte[] b)
          throws CorruptIndexException {
    try {
-      decompressor.setInput(input);
+      return CompressionTools.decompress(b);
-
+    } catch (DataFormatException e) {
-      // Decompress the data
+      // this will happen if the field is not compressed
-      byte[] buf = new byte[1024];
+      CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
-      while (!decompressor.finished()) {
+      newException.initCause(e);
-        try {
+      throw newException;
          int count = decompressor.inflate(buf);
          bos.write(buf, 0, count);
        }
        catch (DataFormatException e) {
          // this will happen if the field is not compressed
          CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
          newException.initCause(e);
          throw newException;
        }
      }
    } finally {  
      decompressor.end();
    }
    // Get the decompressed data
    return bos.toByteArray();
  }
-  
+
  // Instances of this class hold field properties and data
  // for merge
  final static class FieldForMerge extends AbstractField {
--- a/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/src/java/org/apache/lucene/index/FieldsWriter.java
@ -16,13 +16,12 @@ package org.apache.lucene.index;
 * the License.
 */
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.zip.Deflater;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.document.CompressionTools;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.IndexOutput;
@ -203,10 +202,10 @@ final class FieldsWriter
        } else {
          // check if it is a binary field
          if (field.isBinary()) {
-            data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
+            data = CompressionTools.compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
          } else {
            byte x[] = field.stringValue().getBytes("UTF-8");
-            data = compress(x, 0, x.length);
+            data = CompressionTools.compress(x, 0, x.length);
          }
          len = data.length;
          offset = 0;
@ -269,43 +268,4 @@ final class FieldsWriter
              writeField(fieldInfos.fieldInfo(field.name()), field);
        }
    }
    private final byte[] compress (byte[] input, int offset, int length) {
      // Create the compressor with highest level of compression
      Deflater compressor = new Deflater();
      compressor.setLevel(Deflater.BEST_COMPRESSION);
      // Give the compressor the data to compress
      compressor.setInput(input, offset, length);
      compressor.finish();
      /*
       * Create an expandable byte array to hold the compressed data.
       * You cannot use an array that's the same size as the orginal because
       * there is no guarantee that the compressed data will be smaller than
       * the uncompressed data.
       */
      ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
      try {
        compressor.setLevel(Deflater.BEST_COMPRESSION);
        // Give the compressor the data to compress
        compressor.setInput(input);
        compressor.finish();
        // Compress the data
        byte[] buf = new byte[1024];
        while (!compressor.finished()) {
          int count = compressor.deflate(buf);
          bos.write(buf, 0, count);
        }
      } finally {      
        compressor.end();
      }
      // Get the compressed data
      return bos.toByteArray();
    }
 }
--- a/src/test/org/apache/lucene/document/TestBinaryDocument.java
+++ b/src/test/org/apache/lucene/document/TestBinaryDocument.java
@ -5,7 +5,7 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.MockRAMDirectory;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -43,7 +43,7 @@ public class TestBinaryDocument extends LuceneTestCase
    Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
    Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
    Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
-    
+
    try {
      // binary fields with store off are not allowed
      new Field("fail", binaryValCompressed.getBytes(), Field.Store.NO);
@ -60,12 +60,12 @@ public class TestBinaryDocument extends LuceneTestCase
    doc.add(stringFldStored);
    doc.add(stringFldCompressed);
-    
+
    /** test for field count */
    assertEquals(4, doc.fields.size());
    /** add the doc to a ram index */
-    RAMDirectory dir = new RAMDirectory();
+    MockRAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    writer.addDocument(doc);
    writer.close();
@ -90,13 +90,40 @@ public class TestBinaryDocument extends LuceneTestCase
    /** fetch the compressed string field and compare it's content with the original one */
    String stringFldCompressedTest = docFromReader.get("stringCompressed");
    assertTrue(stringFldCompressedTest.equals(binaryValCompressed));
-    
+
    /** delete the document from index */
    reader.deleteDocument(0);
    assertEquals(0, reader.numDocs());
    reader.close();
-    
+    dir.close();
  }
  public void testCompressionTools()
    throws Exception
  {
    Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()), Field.Store.YES);
    Document doc = new Document();
    doc.add(binaryFldCompressed);
    /** add the doc to a ram index */
    MockRAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    writer.addDocument(doc);
    writer.close();
    /** open a reader and fetch the document */ 
    IndexReader reader = IndexReader.open(dir);
    Document docFromReader = reader.document(0);
    assertTrue(docFromReader != null);
    /** fetch the binary compressed field and compare it's content with the original one */
    String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
    assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
    reader.close();
    dir.close();
  }
 }