mirror of https://github.com/apache/lucene.git
LUCENE-1223: fix lazy field loading to not allow string field to be loaded as binary, nor vice/versa
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@636568 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4b3709be87
commit
2768bacab2
|
@ -70,7 +70,7 @@ Bug fixes
|
|||
|
||||
4. LUCENE-1213: MultiFieldQueryParser was ignoring slop in case
|
||||
of a single field phrase. (Trejkaz via Doron Cohen)
|
||||
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
|
||||
|
|
|
@ -235,15 +235,15 @@ final class FieldsReader {
|
|||
}
|
||||
|
||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
||||
if (binary == true) {
|
||||
if (binary) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
if (compressed) {
|
||||
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
|
||||
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
|
||||
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
|
||||
} else {
|
||||
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
|
||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
|
||||
}
|
||||
//Need to move the pointer ahead by toRead positions
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
|
@ -257,7 +257,7 @@ final class FieldsReader {
|
|||
store = Field.Store.COMPRESS;
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
f = new LazyField(fi.name, store, toRead, pointer);
|
||||
f = new LazyField(fi.name, store, toRead, pointer, binary);
|
||||
//skip over the part that we aren't loading
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
|
@ -266,7 +266,7 @@ final class FieldsReader {
|
|||
long pointer = fieldsStream.getFilePointer();
|
||||
//Skip ahead of where we are by the length of what is stored
|
||||
fieldsStream.skipChars(length);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
}
|
||||
doc.add(f);
|
||||
|
@ -385,17 +385,19 @@ final class FieldsReader {
|
|||
private int toRead;
|
||||
private long pointer;
|
||||
|
||||
public LazyField(String name, Field.Store store, int toRead, long pointer) {
|
||||
public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
|
||||
super(name, store, Field.Index.NO, Field.TermVector.NO);
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
this.isBinary = isBinary;
|
||||
lazy = true;
|
||||
}
|
||||
|
||||
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
|
||||
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
|
||||
super(name, store, index, termVector);
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
this.isBinary = isBinary;
|
||||
lazy = true;
|
||||
}
|
||||
|
||||
|
@ -413,25 +415,27 @@ final class FieldsReader {
|
|||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public byte[] binaryValue() {
|
||||
ensureOpen();
|
||||
if (fieldsData == null) {
|
||||
final byte[] b = new byte[toRead];
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
//Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
|
||||
//since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
if (isCompressed == true) {
|
||||
fieldsData = uncompress(b);
|
||||
} else {
|
||||
fieldsData = b;
|
||||
if (isBinary) {
|
||||
if (fieldsData == null) {
|
||||
final byte[] b = new byte[toRead];
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
//Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
|
||||
//since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
if (isCompressed == true) {
|
||||
fieldsData = uncompress(b);
|
||||
} else {
|
||||
fieldsData = b;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
isBinary = true;
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
}
|
||||
return isBinary ? (byte[]) fieldsData : null;
|
||||
return (byte[]) fieldsData;
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
|
@ -439,42 +443,45 @@ final class FieldsReader {
|
|||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public Reader readerValue() {
|
||||
ensureOpen();
|
||||
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
|
||||
return null;
|
||||
}
|
||||
|
||||
/** The value of the field as a TokesStream, or null. If null, the Reader value,
|
||||
/** The value of the field as a TokenStream, or null. If null, the Reader value,
|
||||
* String value, or binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public TokenStream tokenStreamValue() {
|
||||
ensureOpen();
|
||||
return fieldsData instanceof TokenStream ? (TokenStream) fieldsData : null;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
|
||||
public String stringValue() {
|
||||
ensureOpen();
|
||||
if (fieldsData == null) {
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
if (isCompressed) {
|
||||
final byte[] b = new byte[toRead];
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
fieldsData = new String(uncompress(b), "UTF-8");
|
||||
} else {
|
||||
//read in chars b/c we already know the length we need to read
|
||||
char[] chars = new char[toRead];
|
||||
localFieldsStream.readChars(chars, 0, toRead);
|
||||
fieldsData = new String(chars);
|
||||
if (isBinary)
|
||||
return null;
|
||||
else {
|
||||
if (fieldsData == null) {
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
if (isCompressed) {
|
||||
final byte[] b = new byte[toRead];
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
fieldsData = new String(uncompress(b), "UTF-8");
|
||||
} else {
|
||||
//read in chars b/c we already know the length we need to read
|
||||
char[] chars = new char[toRead];
|
||||
localFieldsStream.readChars(chars, 0, toRead);
|
||||
fieldsData = new String(chars);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
return (String) fieldsData;
|
||||
}
|
||||
return fieldsData instanceof String ? (String) fieldsData : null;
|
||||
}
|
||||
|
||||
public long getPointer() {
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
@ -110,6 +109,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("field is not lazy and it should be", field.isLazy());
|
||||
assertTrue("binaryValue isn't null for lazy string field", field.binaryValue() == null);
|
||||
value = field.stringValue();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true);
|
||||
|
@ -128,6 +128,8 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
|
||||
field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
|
||||
|
||||
byte [] bytes = field.binaryValue();
|
||||
assertTrue("bytes is null and it shouldn't be", bytes != null);
|
||||
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
|
||||
|
@ -160,7 +162,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
assertTrue("field is not lazy and it should be", field.isLazy());
|
||||
reader.close();
|
||||
try {
|
||||
String value = field.stringValue();
|
||||
field.stringValue();
|
||||
fail("did not hit AlreadyClosedException as expected");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
|
|
Loading…
Reference in New Issue