mirror of https://github.com/apache/lucene.git
LUCENE-1219: add Fieldable.getBinaryValue/Offset/Length reuse API
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@686723 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
027307857e
commit
a26f3734c6
|
@ -100,6 +100,11 @@ API Changes
|
|||
frequency, positions and payloads. This saves index space, and
|
||||
indexing/searching time. (Eks Dev via Mike McCandless)
|
||||
|
||||
15. LUCENE-1219: Add basic reuse API to Fieldable for binary fields:
|
||||
getBinaryValue/Offset/Length(); currently only lazy fields reuse
|
||||
the provided byte[] result to getBinaryValue. (Eks Dev via Mike
|
||||
McCandless)
|
||||
|
||||
Bug fixes
|
||||
|
||||
1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single
|
||||
|
|
|
@ -37,10 +37,12 @@ public abstract class AbstractField implements Fieldable {
|
|||
protected float boost = 1.0f;
|
||||
// the one and only data object for all different kind of field values
|
||||
protected Object fieldsData = null;
|
||||
//length/offset for all primitive types
|
||||
protected int binaryLength;
|
||||
protected int binaryOffset;
|
||||
|
||||
protected AbstractField()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
|
@ -199,7 +201,43 @@ public abstract class AbstractField implements Fieldable {
|
|||
}
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
public final boolean isBinary() { return isBinary; }
|
||||
public final boolean isBinary() {
|
||||
return isBinary;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the raw byte[] for the binary field. Note that
|
||||
* you must also call {@link #getBinaryLength} and {@link
|
||||
* #getBinaryOffset} to know which range of bytes in this
|
||||
* returned array belong to the field.
|
||||
* @return reference to the Field value as byte[].
|
||||
*/
|
||||
public byte[] getBinaryValue() {
|
||||
return getBinaryValue(null);
|
||||
}
|
||||
|
||||
public byte[] getBinaryValue(byte[] result){
|
||||
return isBinary ? (byte[]) fieldsData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns length of byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return length of byte[] segment that represents this Field value
|
||||
*/
|
||||
public int getBinaryLength() {
|
||||
return binaryLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns offset into byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return index of the first character in byte[] segment that represents this Field value
|
||||
*/
|
||||
public int getBinaryOffset() {
|
||||
return binaryOffset;
|
||||
}
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
|
|
@ -137,22 +137,39 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
|
||||
/** The value of the field as a String, or null. If null, the Reader value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
|
||||
|
||||
/** The value of the field in Binary, or null. If null, the Reader value,
|
||||
* String value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public byte[] binaryValue() { return isBinary ? (byte[])fieldsData : null; }
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
|
||||
* @deprecated This method must allocate a new byte[] if
|
||||
* the {@link AbstractField#getBinaryOffset()} is non-zero
|
||||
* or {@link AbstractField#getBinaryLength()} is not the
|
||||
* full length of the byte[]. Please use {@link
|
||||
* AbstractField#getBinaryValue()} instead, which simply
|
||||
* returns the byte[].
|
||||
*/
|
||||
public byte[] binaryValue() {
|
||||
if (!isBinary)
|
||||
return null;
|
||||
final byte[] data = (byte[]) fieldsData;
|
||||
if (binaryOffset == 0 && data.length == binaryLength)
|
||||
return data; //Optimization
|
||||
|
||||
final byte[] ret = new byte[binaryLength];
|
||||
System.arraycopy(data, binaryOffset, ret, 0, binaryLength);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** The value of the field as a TokesStream, or null. If null, the Reader value,
|
||||
* String value, or binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
public TokenStream tokenStreamValue() { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData : null; }
|
||||
|
||||
|
||||
|
@ -182,8 +199,18 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
|
||||
public void setValue(byte[] value) {
|
||||
fieldsData = value;
|
||||
binaryLength = value.length;
|
||||
binaryOffset = 0;
|
||||
}
|
||||
|
||||
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
|
||||
public void setValue(byte[] value, int offset, int length) {
|
||||
fieldsData = value;
|
||||
binaryLength = length;
|
||||
binaryOffset = offset;
|
||||
}
|
||||
|
||||
|
||||
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
|
||||
public void setValue(TokenStream value) {
|
||||
fieldsData = value;
|
||||
|
@ -378,34 +405,49 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
* @throws IllegalArgumentException if store is <code>Store.NO</code>
|
||||
*/
|
||||
public Field(String name, byte[] value, Store store) {
|
||||
this(name, value, 0, value.length, store);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a stored field with binary value. Optionally the value may be compressed.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param value The binary value
|
||||
* @param offset Starting offset in value where this Field's bytes are
|
||||
* @param length Number of bytes to use for this Field, starting at offset
|
||||
* @param store How <code>value</code> should be stored (compressed or not)
|
||||
* @throws IllegalArgumentException if store is <code>Store.NO</code>
|
||||
*/
|
||||
public Field(String name, byte[] value, int offset, int length, Store store) {
|
||||
|
||||
if (name == null)
|
||||
throw new IllegalArgumentException("name cannot be null");
|
||||
if (value == null)
|
||||
throw new IllegalArgumentException("value cannot be null");
|
||||
|
||||
this.name = name.intern();
|
||||
this.fieldsData = value;
|
||||
fieldsData = value;
|
||||
|
||||
if (store == Store.YES){
|
||||
this.isStored = true;
|
||||
this.isCompressed = false;
|
||||
if (store == Store.YES) {
|
||||
isStored = true;
|
||||
isCompressed = false;
|
||||
}
|
||||
else if (store == Store.COMPRESS) {
|
||||
this.isStored = true;
|
||||
this.isCompressed = true;
|
||||
isStored = true;
|
||||
isCompressed = true;
|
||||
}
|
||||
else if (store == Store.NO)
|
||||
throw new IllegalArgumentException("binary values can't be unstored");
|
||||
else
|
||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||
|
||||
this.isIndexed = false;
|
||||
this.isTokenized = false;
|
||||
isIndexed = false;
|
||||
isTokenized = false;
|
||||
|
||||
this.isBinary = true;
|
||||
isBinary = true;
|
||||
binaryLength = length;
|
||||
binaryOffset = offset;
|
||||
|
||||
setStoreTermVector(TermVector.NO);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -156,4 +156,45 @@ public interface Fieldable extends Serializable {
|
|||
* @return true if this field can be loaded lazily
|
||||
*/
|
||||
boolean isLazy();
|
||||
|
||||
/**
|
||||
* Returns offset into byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return index of the first character in byte[] segment that represents this Field value
|
||||
*/
|
||||
abstract int getBinaryOffset();
|
||||
|
||||
/**
|
||||
* Returns length of byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return length of byte[] segment that represents this Field value
|
||||
*/
|
||||
abstract int getBinaryLength();
|
||||
|
||||
/**
|
||||
* Return the raw byte[] for the binary field. Note that
|
||||
* you must also call {@link #getBinaryLength} and {@link
|
||||
* #getBinaryOffset} to know which range of bytes in this
|
||||
* returned array belong to the field.
|
||||
* @return reference to the Field value as byte[].
|
||||
*/
|
||||
abstract byte[] getBinaryValue();
|
||||
|
||||
/**
|
||||
* Return the raw byte[] for the binary field. Note that
|
||||
* you must also call {@link #getBinaryLength} and {@link
|
||||
* #getBinaryOffset} to know which range of bytes in this
|
||||
* returned array belong to the field.<p>
|
||||
* About reuse: if you pass in the result byte[] and it is
|
||||
* used, likely the underlying implementation will hold
|
||||
* onto this byte[] and return it in future calls to
|
||||
* {@link #binaryValue()} or {@link #getBinaryValue()}.
|
||||
* So if you subsequently re-use the same byte[] elsewhere
|
||||
* it will alter this Fieldable's value.
|
||||
* @param result User defined buffer that will be used if
|
||||
* possible. If this is null or not large enough, a new
|
||||
* buffer is allocated
|
||||
* @return reference to the Field value as byte[].
|
||||
*/
|
||||
abstract byte[] getBinaryValue(byte[] result);
|
||||
}
|
||||
|
|
|
@ -450,28 +450,7 @@ final class FieldsReader {
|
|||
* String value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public byte[] binaryValue() {
|
||||
ensureOpen();
|
||||
if (isBinary) {
|
||||
if (fieldsData == null) {
|
||||
final byte[] b = new byte[toRead];
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
//Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
|
||||
//since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
if (isCompressed == true) {
|
||||
fieldsData = uncompress(b);
|
||||
} else {
|
||||
fieldsData = b;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
}
|
||||
return (byte[]) fieldsData;
|
||||
} else
|
||||
return null;
|
||||
return getBinaryValue(null);
|
||||
}
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
|
@ -545,8 +524,45 @@ final class FieldsReader {
|
|||
ensureOpen();
|
||||
this.toRead = toRead;
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] getBinaryValue(byte[] result) {
|
||||
ensureOpen();
|
||||
|
||||
if (isBinary) {
|
||||
if (fieldsData == null) {
|
||||
// Allocate new buffer if result is null or too small
|
||||
final byte[] b;
|
||||
if (result == null || result.length < toRead)
|
||||
b = new byte[toRead];
|
||||
else
|
||||
b = result;
|
||||
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
|
||||
// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
|
||||
// since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, toRead);
|
||||
if (isCompressed == true) {
|
||||
fieldsData = uncompress(b);
|
||||
} else {
|
||||
fieldsData = b;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
|
||||
binaryOffset = 0;
|
||||
binaryLength = toRead;
|
||||
}
|
||||
|
||||
return (byte[]) fieldsData;
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private final byte[] uncompress(final byte[] input)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ final class FieldsWriter
|
|||
doClose = true;
|
||||
}
|
||||
|
||||
FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
|
||||
FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
|
||||
fieldInfos = fn;
|
||||
fieldsStream = fdt;
|
||||
indexStream = fdx;
|
||||
|
@ -190,32 +190,42 @@ final class FieldsWriter
|
|||
|
||||
if (field.isCompressed()) {
|
||||
// compression is enabled for the current field
|
||||
byte[] data = null;
|
||||
|
||||
final byte[] data;
|
||||
final int len;
|
||||
final int offset;
|
||||
if (disableCompression) {
|
||||
// optimized case for merging, the data
|
||||
// is already compressed
|
||||
data = field.binaryValue();
|
||||
data = field.getBinaryValue();
|
||||
len = field.getBinaryLength();
|
||||
offset = field.getBinaryOffset();
|
||||
} else {
|
||||
// check if it is a binary field
|
||||
if (field.isBinary()) {
|
||||
data = compress(field.binaryValue());
|
||||
}
|
||||
else {
|
||||
data = compress(field.stringValue().getBytes("UTF-8"));
|
||||
data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
|
||||
} else {
|
||||
byte x[] = field.stringValue().getBytes("UTF-8");
|
||||
data = compress(x, 0, x.length);
|
||||
}
|
||||
len = data.length;
|
||||
offset = 0;
|
||||
}
|
||||
final int len = data.length;
|
||||
|
||||
fieldsStream.writeVInt(len);
|
||||
fieldsStream.writeBytes(data, len);
|
||||
fieldsStream.writeBytes(data, offset, len);
|
||||
}
|
||||
else {
|
||||
// compression is disabled for the current field
|
||||
if (field.isBinary()) {
|
||||
byte[] data = field.binaryValue();
|
||||
final int len = data.length;
|
||||
final byte[] data;
|
||||
final int len;
|
||||
final int offset;
|
||||
data = field.getBinaryValue();
|
||||
len = field.getBinaryLength();
|
||||
offset = field.getBinaryOffset();
|
||||
|
||||
fieldsStream.writeVInt(len);
|
||||
fieldsStream.writeBytes(data, len);
|
||||
fieldsStream.writeBytes(data, offset, len);
|
||||
}
|
||||
else {
|
||||
fieldsStream.writeString(field.stringValue());
|
||||
|
@ -259,7 +269,14 @@ final class FieldsWriter
|
|||
}
|
||||
}
|
||||
|
||||
private final byte[] compress (byte[] input) {
|
||||
private final byte[] compress (byte[] input, int offset, int length) {
|
||||
// Create the compressor with highest level of compression
|
||||
Deflater compressor = new Deflater();
|
||||
compressor.setLevel(Deflater.BEST_COMPRESSION);
|
||||
|
||||
// Give the compressor the data to compress
|
||||
compressor.setInput(input, offset, length);
|
||||
compressor.finish();
|
||||
|
||||
/*
|
||||
* Create an expandable byte array to hold the compressed data.
|
||||
|
@ -267,10 +284,7 @@ final class FieldsWriter
|
|||
* there is no guarantee that the compressed data will be smaller than
|
||||
* the uncompressed data.
|
||||
*/
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
|
||||
|
||||
// Create the compressor with highest level of compression
|
||||
Deflater compressor = new Deflater();
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
|
||||
|
||||
try {
|
||||
compressor.setLevel(Deflater.BEST_COMPRESSION);
|
||||
|
|
|
@ -3765,4 +3765,36 @@ public class TestIndexWriter extends LuceneTestCase
|
|||
w.doFail = false;
|
||||
w.rollback();
|
||||
}
|
||||
|
||||
|
||||
// LUCENE-1219
|
||||
public void testBinaryFieldOffsetLength() throws IOException {
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
byte[] b = new byte[50];
|
||||
for(int i=0;i<50;i++)
|
||||
b[i] = (byte) (i+77);
|
||||
|
||||
Document doc = new Document();
|
||||
Field f = new Field("binary", b, 10, 17, Field.Store.YES);
|
||||
byte[] bx = f.getBinaryValue();
|
||||
assertTrue(bx != null);
|
||||
assertEquals(50, bx.length);
|
||||
assertEquals(10, f.getBinaryOffset());
|
||||
assertEquals(17, f.getBinaryLength());
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader ir = IndexReader.open(dir);
|
||||
doc = ir.document(0);
|
||||
f = doc.getField("binary");
|
||||
b = f.getBinaryValue();
|
||||
assertTrue(b != null);
|
||||
assertEquals(17, b.length, 17);
|
||||
assertEquals(87, b[0]);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue