Lucene 762 - Applied original patch as I could not find a nice way to handle lazy fields as part of a readField() method on FieldSelectorResult per the suggestion on 762.

All tests pass.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@505154 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-02-09 03:16:20 +00:00
parent edde3b691a
commit 65d27c8d72
3 changed files with 88 additions and 33 deletions

View File

@ -60,6 +60,17 @@ public final class FieldSelectorResult {
*/
public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
/** Expert: Load the size of this {@link Field} rather than its value.
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
* The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
*/
public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */
public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
private int id;
private FieldSelectorResult(int id) {

View File

@ -17,21 +17,16 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.document.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.Reader;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
/**
* Class responsible for access to stored document fields.
* <p/>
@ -95,25 +90,33 @@ final class FieldsReader {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
byte bits = fieldsStream.readByte();
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
//TODO: Find an alternative approach here if this list continues to grow beyond the
//list of 5 or 6 currently here. See Lucene 762 for discussion
if (acceptField.equals(FieldSelectorResult.LOAD)) {
addField(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
addFieldForMerge(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
addField(doc, fi, binary, compressed, tokenize);
break;//Get out of this loop
}
else if (lazy == true){
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
addFieldLazy(doc, fi, binary, compressed, tokenize);
}
}
else if (acceptField.equals(FieldSelectorResult.SIZE)){
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
}
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
addFieldSize(doc, fi, binary, compressed);
break;
}
else {
skipField(binary, compressed);
}
@ -127,9 +130,10 @@ final class FieldsReader {
* This will have the most payoff on large fields.
*/
private void skipField(boolean binary, boolean compressed) throws IOException {
int toRead = fieldsStream.readVInt();
skipField(binary, compressed, fieldsStream.readVInt());
}
private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
if (binary || compressed) {
long pointer = fieldsStream.getFilePointer();
fieldsStream.seek(pointer + toRead);
@ -236,6 +240,20 @@ final class FieldsReader {
doc.add(f);
}
}
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
sizebytes[2] = (byte) (bytesize>>> 8);
sizebytes[3] = (byte) bytesize ;
doc.add(new Field(fi.name, sizebytes, Field.Store.YES));
return size;
}
private Field.TermVector getTermVectorType(FieldInfo fi) {
Field.TermVector termVector = null;

View File

@ -17,27 +17,18 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util._TestUtil;
import java.io.File;
import java.io.IOException;
import java.util.*;
public class TestFieldsReader extends TestCase {
private RAMDirectory dir = new RAMDirectory();
private Document testDoc = new Document();
@ -225,6 +216,41 @@ public class TestFieldsReader extends TestCase {
System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
}
public void testLoadSize() throws IOException {
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
Document doc;
doc = reader.doc(0, new FieldSelector(){
public FieldSelectorResult accept(String fieldName) {
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
return FieldSelectorResult.SIZE;
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
return FieldSelectorResult.LOAD;
else
return FieldSelectorResult.NO_LOAD;
}
});
Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY);
Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
assertTrue(f1.isBinary());
assertTrue(!f3.isBinary());
assertTrue(fb.isBinary());
assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue());
assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue());
reader.close();
}
private void assertSizeEquals(int size, byte[] sizebytes) {
assertEquals((byte) (size>>>24), sizebytes[0]);
assertEquals((byte) (size>>>16), sizebytes[1]);
assertEquals((byte) (size>>> 8), sizebytes[2]);
assertEquals((byte) size , sizebytes[3]);
}
}