LUCENE-652: add compress/decompressString, too

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@756760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-03-20 21:10:12 +00:00
parent a7697e8cef
commit a00257a7ba
3 changed files with 36 additions and 13 deletions

View File

@ -21,21 +21,14 @@ import java.util.zip.Deflater;
import java.util.zip.Inflater;
import java.util.zip.DataFormatException;
import java.io.ByteArrayOutputStream;
import org.apache.lucene.util.UnicodeUtil;
/** Simple utility class providing static methods to
* compress and decompress binary data for stored fields.
* This class uses java.util.zip.Deflater and Inflater
* classes to compress and decompress.
*
* To compress a String:
* <pre>
* String string = ...
* byte[] bytes = compress(string.getBytes("UTF-8");
* </pre>
* and to decompress:
* <pre>
* new String(decompress(bytes), "UTF-8");
* </pre>
* classes to compress and decompress, which is the same
* format previously used by the now deprecated
* Field.Store.COMPRESS.
*/
public class CompressionTools {
@ -84,6 +77,20 @@ public class CompressionTools {
return compress(value, 0, value.length, Deflater.BEST_COMPRESSION);
}
/** Compresses the String value, with default BEST_COMPRESSION level */
public static byte[] compressString(String value) {
return compressString(value, Deflater.BEST_COMPRESSION);
}
/** Compresses the String value using the specified
* compressionLevel (constants are defined in
* java.util.zip.Deflater). */
public static byte[] compressString(String value, int compressionLevel) {
UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
return compress(result.result, 0, result.length, compressionLevel);
}
/** Decompress the byte array previously returned by
* compress */
public static byte[] decompress(byte[] value) throws DataFormatException {
@ -107,4 +114,13 @@ public class CompressionTools {
return bos.toByteArray();
}
/** Decompress the byte array previously returned by
* compressString back into a String */
public static String decompressString(byte[] value) throws DataFormatException {
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
final byte[] bytes = decompress(value);
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
return new String(result.result, 0, result.length);
}
}

View File

@ -43,7 +43,11 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
/** Store the original field value in the index in a compressed form. This is
* useful for long documents and for binary valued fields.
* @deprecated Please use {@link CompressionTools} instead
* @deprecated Please use {@link CompressionTools} instead.
* For string fields that were previously indexed and stored using compression,
* the new way to achive this is: First add the field indexed-only (no store)
* and additionally using the same field name as a binary, stored field
* with {@link CompressionTools#compressString}.
*/
public static final Store COMPRESS = new Store("COMPRESS");

View File

@ -103,10 +103,12 @@ public class TestBinaryDocument extends LuceneTestCase
throws Exception
{
Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()), Field.Store.YES);
Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.compressString(binaryValCompressed), Field.Store.YES);
Document doc = new Document();
doc.add(binaryFldCompressed);
doc.add(stringFldCompressed);
/** add the doc to a ram index */
MockRAMDirectory dir = new MockRAMDirectory();
@ -122,7 +124,8 @@ public class TestBinaryDocument extends LuceneTestCase
/** fetch the binary compressed field and compare it's content with the original one */
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));
reader.close();
dir.close();
}