LUCENE-1219: add Fieldable.getBinaryValue/Offset/Length reuse API

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@686723 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-08-18 10:31:03 +00:00
parent 027307857e
commit a26f3734c6
7 changed files with 247 additions and 59 deletions

View File

@ -100,6 +100,11 @@ API Changes
frequency, positions and payloads. This saves index space, and
indexing/searching time. (Eks Dev via Mike McCandless)
15. LUCENE-1219: Add basic reuse API to Fieldable for binary fields:
getBinaryValue/Offset/Length(); currently only lazy fields reuse
the provided byte[] result to getBinaryValue. (Eks Dev via Mike
McCandless)
Bug fixes
1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single

View File

@ -37,10 +37,12 @@ public abstract class AbstractField implements Fieldable {
protected float boost = 1.0f;
// the one and only data object for all different kind of field values
protected Object fieldsData = null;
//length/offset for all primitive types
protected int binaryLength;
protected int binaryOffset;
protected AbstractField()
{
}
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
@ -199,7 +201,43 @@ public abstract class AbstractField implements Fieldable {
}
/** True iff the value of the filed is stored as binary */
public final boolean isBinary() { return isBinary; }
public final boolean isBinary() {
return isBinary;
}
/**
* Return the raw byte[] for the binary field. Note that
* you must also call {@link #getBinaryLength} and {@link
* #getBinaryOffset} to know which range of bytes in this
* returned array belong to the field.
* @return reference to the Field value as byte[].
*/
public byte[] getBinaryValue() {
return getBinaryValue(null);
}
public byte[] getBinaryValue(byte[] result){
return isBinary ? (byte[]) fieldsData : null;
}
/**
* Returns length of byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return length of byte[] segment that represents this Field value
*/
public int getBinaryLength() {
return binaryLength;
}
/**
* Returns offset into byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return index of the first character in byte[] segment that represents this Field value
*/
public int getBinaryOffset() {
return binaryOffset;
}
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }

View File

@ -137,22 +137,39 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
/** The value of the field as a String, or null. If null, the Reader value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
/** The value of the field as a Reader, or null. If null, the String value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
/** The value of the field in Binary, or null. If null, the Reader value,
* String value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public byte[] binaryValue() { return isBinary ? (byte[])fieldsData : null; }
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
* @deprecated This method must allocate a new byte[] if
* the {@link AbstractField#getBinaryOffset()} is non-zero
* or {@link AbstractField#getBinaryLength()} is not the
* full length of the byte[]. Please use {@link
* AbstractField#getBinaryValue()} instead, which simply
* returns the byte[].
*/
public byte[] binaryValue() {
if (!isBinary)
return null;
final byte[] data = (byte[]) fieldsData;
if (binaryOffset == 0 && data.length == binaryLength)
return data; //Optimization
final byte[] ret = new byte[binaryLength];
System.arraycopy(data, binaryOffset, ret, 0, binaryLength);
return ret;
}
/** The value of the field as a TokesStream, or null. If null, the Reader value,
* String value, or binary value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public TokenStream tokenStreamValue() { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData : null; }
@ -182,8 +199,18 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
public void setValue(byte[] value) {
fieldsData = value;
binaryLength = value.length;
binaryOffset = 0;
}
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
public void setValue(byte[] value, int offset, int length) {
fieldsData = value;
binaryLength = length;
binaryOffset = offset;
}
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
public void setValue(TokenStream value) {
fieldsData = value;
@ -378,34 +405,49 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
* @throws IllegalArgumentException if store is <code>Store.NO</code>
*/
public Field(String name, byte[] value, Store store) {
this(name, value, 0, value.length, store);
}
/**
* Create a stored field with binary value. Optionally the value may be compressed.
*
* @param name The name of the field
* @param value The binary value
* @param offset Starting offset in value where this Field's bytes are
* @param length Number of bytes to use for this Field, starting at offset
* @param store How <code>value</code> should be stored (compressed or not)
* @throws IllegalArgumentException if store is <code>Store.NO</code>
*/
public Field(String name, byte[] value, int offset, int length, Store store) {
if (name == null)
throw new IllegalArgumentException("name cannot be null");
if (value == null)
throw new IllegalArgumentException("value cannot be null");
this.name = name.intern();
this.fieldsData = value;
fieldsData = value;
if (store == Store.YES){
this.isStored = true;
this.isCompressed = false;
if (store == Store.YES) {
isStored = true;
isCompressed = false;
}
else if (store == Store.COMPRESS) {
this.isStored = true;
this.isCompressed = true;
isStored = true;
isCompressed = true;
}
else if (store == Store.NO)
throw new IllegalArgumentException("binary values can't be unstored");
else
throw new IllegalArgumentException("unknown store parameter " + store);
this.isIndexed = false;
this.isTokenized = false;
isIndexed = false;
isTokenized = false;
this.isBinary = true;
isBinary = true;
binaryLength = length;
binaryOffset = offset;
setStoreTermVector(TermVector.NO);
}
}

View File

@ -156,4 +156,45 @@ public interface Fieldable extends Serializable {
* @return true if this field can be loaded lazily
*/
boolean isLazy();
/**
* Returns offset into byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return index of the first character in byte[] segment that represents this Field value
*/
abstract int getBinaryOffset();
/**
* Returns length of byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return length of byte[] segment that represents this Field value
*/
abstract int getBinaryLength();
/**
* Return the raw byte[] for the binary field. Note that
* you must also call {@link #getBinaryLength} and {@link
* #getBinaryOffset} to know which range of bytes in this
* returned array belong to the field.
* @return reference to the Field value as byte[].
*/
abstract byte[] getBinaryValue();
/**
* Return the raw byte[] for the binary field. Note that
* you must also call {@link #getBinaryLength} and {@link
* #getBinaryOffset} to know which range of bytes in this
* returned array belong to the field.<p>
* About reuse: if you pass in the result byte[] and it is
* used, likely the underlying implementation will hold
* onto this byte[] and return it in future calls to
* {@link #binaryValue()} or {@link #getBinaryValue()}.
* So if you subsequently re-use the same byte[] elsewhere
* it will alter this Fieldable's value.
* @param result User defined buffer that will be used if
* possible. If this is null or not large enough, a new
* buffer is allocated
* @return reference to the Field value as byte[].
*/
abstract byte[] getBinaryValue(byte[] result);
}

View File

@ -450,28 +450,7 @@ final class FieldsReader {
* String value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public byte[] binaryValue() {
ensureOpen();
if (isBinary) {
if (fieldsData == null) {
final byte[] b = new byte[toRead];
IndexInput localFieldsStream = getFieldStream();
//Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
//since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, b.length);
if (isCompressed == true) {
fieldsData = uncompress(b);
} else {
fieldsData = b;
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
}
return (byte[]) fieldsData;
} else
return null;
return getBinaryValue(null);
}
/** The value of the field as a Reader, or null. If null, the String value,
@ -545,8 +524,45 @@ final class FieldsReader {
ensureOpen();
this.toRead = toRead;
}
}
public byte[] getBinaryValue(byte[] result) {
ensureOpen();
if (isBinary) {
if (fieldsData == null) {
// Allocate new buffer if result is null or too small
final byte[] b;
if (result == null || result.length < toRead)
b = new byte[toRead];
else
b = result;
IndexInput localFieldsStream = getFieldStream();
// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
// since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
if (isCompressed == true) {
fieldsData = uncompress(b);
} else {
fieldsData = b;
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
binaryOffset = 0;
binaryLength = toRead;
}
return (byte[]) fieldsData;
} else
return null;
}
}
private final byte[] uncompress(final byte[] input)
throws CorruptIndexException, IOException {

View File

@ -105,7 +105,7 @@ final class FieldsWriter
doClose = true;
}
FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
fieldInfos = fn;
fieldsStream = fdt;
indexStream = fdx;
@ -190,32 +190,42 @@ final class FieldsWriter
if (field.isCompressed()) {
// compression is enabled for the current field
byte[] data = null;
final byte[] data;
final int len;
final int offset;
if (disableCompression) {
// optimized case for merging, the data
// is already compressed
data = field.binaryValue();
data = field.getBinaryValue();
len = field.getBinaryLength();
offset = field.getBinaryOffset();
} else {
// check if it is a binary field
if (field.isBinary()) {
data = compress(field.binaryValue());
}
else {
data = compress(field.stringValue().getBytes("UTF-8"));
data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
} else {
byte x[] = field.stringValue().getBytes("UTF-8");
data = compress(x, 0, x.length);
}
len = data.length;
offset = 0;
}
final int len = data.length;
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, len);
fieldsStream.writeBytes(data, offset, len);
}
else {
// compression is disabled for the current field
if (field.isBinary()) {
byte[] data = field.binaryValue();
final int len = data.length;
final byte[] data;
final int len;
final int offset;
data = field.getBinaryValue();
len = field.getBinaryLength();
offset = field.getBinaryOffset();
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, len);
fieldsStream.writeBytes(data, offset, len);
}
else {
fieldsStream.writeString(field.stringValue());
@ -259,7 +269,14 @@ final class FieldsWriter
}
}
private final byte[] compress (byte[] input) {
private final byte[] compress (byte[] input, int offset, int length) {
// Create the compressor with highest level of compression
Deflater compressor = new Deflater();
compressor.setLevel(Deflater.BEST_COMPRESSION);
// Give the compressor the data to compress
compressor.setInput(input, offset, length);
compressor.finish();
/*
* Create an expandable byte array to hold the compressed data.
@ -267,10 +284,7 @@ final class FieldsWriter
* there is no guarantee that the compressed data will be smaller than
* the uncompressed data.
*/
ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
// Create the compressor with highest level of compression
Deflater compressor = new Deflater();
ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
try {
compressor.setLevel(Deflater.BEST_COMPRESSION);

View File

@ -3765,4 +3765,36 @@ public class TestIndexWriter extends LuceneTestCase
w.doFail = false;
w.rollback();
}
// LUCENE-1219
public void testBinaryFieldOffsetLength() throws IOException {
MockRAMDirectory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
byte[] b = new byte[50];
for(int i=0;i<50;i++)
b[i] = (byte) (i+77);
Document doc = new Document();
Field f = new Field("binary", b, 10, 17, Field.Store.YES);
byte[] bx = f.getBinaryValue();
assertTrue(bx != null);
assertEquals(50, bx.length);
assertEquals(10, f.getBinaryOffset());
assertEquals(17, f.getBinaryLength());
doc.add(f);
w.addDocument(doc);
w.close();
IndexReader ir = IndexReader.open(dir);
doc = ir.document(0);
f = doc.getField("binary");
b = f.getBinaryValue();
assertTrue(b != null);
assertEquals(17, b.length, 17);
assertEquals(87, b[0]);
ir.close();
dir.close();
}
}