LUCENE-1223: fix lazy field loading to not allow string field to be loaded as binary, nor vice/versa

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@636568 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-03-12 23:02:38 +00:00
parent 4b3709be87
commit 2768bacab2
3 changed files with 56 additions and 47 deletions

View File

@ -70,7 +70,7 @@ Bug fixes
4. LUCENE-1213: MultiFieldQueryParser was ignoring slop in case
of a single field phrase. (Trejkaz via Doron Cohen)
New features
1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis

View File

@ -235,15 +235,15 @@ final class FieldsReader {
}
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
if (binary == true) {
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
if (compressed) {
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
} else {
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
}
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
@ -257,7 +257,7 @@ final class FieldsReader {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
f = new LazyField(fi.name, store, toRead, pointer);
f = new LazyField(fi.name, store, toRead, pointer, binary);
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
f.setOmitNorms(fi.omitNorms);
@ -266,7 +266,7 @@ final class FieldsReader {
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.skipChars(length);
f = new LazyField(fi.name, store, index, termVector, length, pointer);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
f.setOmitNorms(fi.omitNorms);
}
doc.add(f);
@ -385,17 +385,19 @@ final class FieldsReader {
private int toRead;
private long pointer;
public LazyField(String name, Field.Store store, int toRead, long pointer) {
public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
super(name, store, Field.Index.NO, Field.TermVector.NO);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
lazy = true;
}
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
super(name, store, index, termVector);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
lazy = true;
}
@ -413,25 +415,27 @@ final class FieldsReader {
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public byte[] binaryValue() {
ensureOpen();
if (fieldsData == null) {
final byte[] b = new byte[toRead];
IndexInput localFieldsStream = getFieldStream();
//Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
//since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, b.length);
if (isCompressed == true) {
fieldsData = uncompress(b);
} else {
fieldsData = b;
if (isBinary) {
if (fieldsData == null) {
final byte[] b = new byte[toRead];
IndexInput localFieldsStream = getFieldStream();
//Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
//since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, b.length);
if (isCompressed == true) {
fieldsData = uncompress(b);
} else {
fieldsData = b;
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
isBinary = true;
} catch (IOException e) {
throw new FieldReaderException(e);
}
}
return isBinary ? (byte[]) fieldsData : null;
return (byte[]) fieldsData;
} else
return null;
}
/** The value of the field as a Reader, or null. If null, the String value,
@ -439,42 +443,45 @@ final class FieldsReader {
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public Reader readerValue() {
ensureOpen();
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
return null;
}
/** The value of the field as a TokesStream, or null. If null, the Reader value,
/** The value of the field as a TokenStream, or null. If null, the Reader value,
* String value, or binary value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public TokenStream tokenStreamValue() {
ensureOpen();
return fieldsData instanceof TokenStream ? (TokenStream) fieldsData : null;
return null;
}
/** The value of the field as a String, or null. If null, the Reader value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public String stringValue() {
ensureOpen();
if (fieldsData == null) {
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
if (isCompressed) {
final byte[] b = new byte[toRead];
localFieldsStream.readBytes(b, 0, b.length);
fieldsData = new String(uncompress(b), "UTF-8");
} else {
//read in chars b/c we already know the length we need to read
char[] chars = new char[toRead];
localFieldsStream.readChars(chars, 0, toRead);
fieldsData = new String(chars);
if (isBinary)
return null;
else {
if (fieldsData == null) {
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
if (isCompressed) {
final byte[] b = new byte[toRead];
localFieldsStream.readBytes(b, 0, b.length);
fieldsData = new String(uncompress(b), "UTF-8");
} else {
//read in chars b/c we already know the length we need to read
char[] chars = new char[toRead];
localFieldsStream.readChars(chars, 0, toRead);
fieldsData = new String(chars);
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
return (String) fieldsData;
}
return fieldsData instanceof String ? (String) fieldsData : null;
}
public long getPointer() {

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.AlreadyClosedException;
@ -110,6 +109,7 @@ public class TestFieldsReader extends LuceneTestCase {
field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("field is not lazy and it should be", field.isLazy());
assertTrue("binaryValue isn't null for lazy string field", field.binaryValue() == null);
value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true);
@ -128,6 +128,8 @@ public class TestFieldsReader extends LuceneTestCase {
field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
byte [] bytes = field.binaryValue();
assertTrue("bytes is null and it shouldn't be", bytes != null);
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
@ -160,7 +162,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue("field is not lazy and it should be", field.isLazy());
reader.close();
try {
String value = field.stringValue();
field.stringValue();
fail("did not hit AlreadyClosedException as expected");
} catch (AlreadyClosedException e) {
// expected