mirror of https://github.com/apache/lucene.git
LUCENE-652: add compress/decompressString, too
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@756760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a7697e8cef
commit
a00257a7ba
|
@ -21,21 +21,14 @@ import java.util.zip.Deflater;
|
|||
import java.util.zip.Inflater;
|
||||
import java.util.zip.DataFormatException;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/** Simple utility class providing static methods to
|
||||
* compress and decompress binary data for stored fields.
|
||||
* This class uses java.util.zip.Deflater and Inflater
|
||||
* classes to compress and decompress.
|
||||
*
|
||||
* To compress a String:
|
||||
* <pre>
|
||||
* String string = ...
|
||||
* byte[] bytes = compress(string.getBytes("UTF-8");
|
||||
* </pre>
|
||||
* and to decompress:
|
||||
* <pre>
|
||||
* new String(decompress(bytes), "UTF-8");
|
||||
* </pre>
|
||||
* classes to compress and decompress, which is the same
|
||||
* format previously used by the now deprecated
|
||||
* Field.Store.COMPRESS.
|
||||
*/
|
||||
|
||||
public class CompressionTools {
|
||||
|
@ -84,6 +77,20 @@ public class CompressionTools {
|
|||
return compress(value, 0, value.length, Deflater.BEST_COMPRESSION);
|
||||
}
|
||||
|
||||
/** Compresses the String value, with default BEST_COMPRESSION level */
|
||||
public static byte[] compressString(String value) {
|
||||
return compressString(value, Deflater.BEST_COMPRESSION);
|
||||
}
|
||||
|
||||
/** Compresses the String value using the specified
|
||||
* compressionLevel (constants are defined in
|
||||
* java.util.zip.Deflater). */
|
||||
public static byte[] compressString(String value, int compressionLevel) {
|
||||
UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
|
||||
UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
|
||||
return compress(result.result, 0, result.length, compressionLevel);
|
||||
}
|
||||
|
||||
/** Decompress the byte array previously returned by
|
||||
* compress */
|
||||
public static byte[] decompress(byte[] value) throws DataFormatException {
|
||||
|
@ -107,4 +114,13 @@ public class CompressionTools {
|
|||
|
||||
return bos.toByteArray();
|
||||
}
|
||||
|
||||
/** Decompress the byte array previously returned by
|
||||
* compressString back into a String */
|
||||
public static String decompressString(byte[] value) throws DataFormatException {
|
||||
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
|
||||
final byte[] bytes = decompress(value);
|
||||
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
|
||||
return new String(result.result, 0, result.length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,11 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
|
||||
/** Store the original field value in the index in a compressed form. This is
|
||||
* useful for long documents and for binary valued fields.
|
||||
* @deprecated Please use {@link CompressionTools} instead
|
||||
* @deprecated Please use {@link CompressionTools} instead.
|
||||
* For string fields that were previously indexed and stored using compression,
|
||||
* the new way to achive this is: First add the field indexed-only (no store)
|
||||
* and additionally using the same field name as a binary, stored field
|
||||
* with {@link CompressionTools#compressString}.
|
||||
*/
|
||||
public static final Store COMPRESS = new Store("COMPRESS");
|
||||
|
||||
|
|
|
@ -103,10 +103,12 @@ public class TestBinaryDocument extends LuceneTestCase
|
|||
throws Exception
|
||||
{
|
||||
Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()), Field.Store.YES);
|
||||
Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.compressString(binaryValCompressed), Field.Store.YES);
|
||||
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(binaryFldCompressed);
|
||||
doc.add(stringFldCompressed);
|
||||
|
||||
/** add the doc to a ram index */
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
|
@ -122,7 +124,8 @@ public class TestBinaryDocument extends LuceneTestCase
|
|||
/** fetch the binary compressed field and compare it's content with the original one */
|
||||
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
|
||||
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
|
||||
|
||||
assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue