mirror of https://github.com/apache/lucene.git
Lucene 762 - Applied original patch as I could not find a nice way to handle lazy fields as part of a readField() method on FieldSelectorResult per the suggestion on 762.
All tests pass. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@505154 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
edde3b691a
commit
65d27c8d72
|
@ -60,6 +60,17 @@ public final class FieldSelectorResult {
|
|||
*/
|
||||
public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
||||
|
||||
/** Expert: Load the size of this {@link Field} rather than its value.
|
||||
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
||||
* The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
|
||||
*/
|
||||
public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
|
||||
|
||||
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */
|
||||
public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
|
||||
|
||||
|
||||
|
||||
private int id;
|
||||
|
||||
private FieldSelectorResult(int id) {
|
||||
|
|
|
@ -17,21 +17,16 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.zip.DataFormatException;
|
||||
import java.util.zip.Inflater;
|
||||
|
||||
import org.apache.lucene.document.AbstractField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* Class responsible for access to stored document fields.
|
||||
* <p/>
|
||||
|
@ -95,25 +90,33 @@ final class FieldsReader {
|
|||
int fieldNumber = fieldsStream.readVInt();
|
||||
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
|
||||
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
||||
boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
|
||||
|
||||
byte bits = fieldsStream.readByte();
|
||||
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
|
||||
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
||||
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||
if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
|
||||
//TODO: Find an alternative approach here if this list continues to grow beyond the
|
||||
//list of 5 or 6 currently here. See Lucene 762 for discussion
|
||||
if (acceptField.equals(FieldSelectorResult.LOAD)) {
|
||||
addField(doc, fi, binary, compressed, tokenize);
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
|
||||
addFieldForMerge(doc, fi, binary, compressed, tokenize);
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
|
||||
addField(doc, fi, binary, compressed, tokenize);
|
||||
break;//Get out of this loop
|
||||
}
|
||||
else if (lazy == true){
|
||||
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
|
||||
addFieldLazy(doc, fi, binary, compressed, tokenize);
|
||||
}
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.SIZE)){
|
||||
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
|
||||
addFieldSize(doc, fi, binary, compressed);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
skipField(binary, compressed);
|
||||
}
|
||||
|
@ -127,9 +130,10 @@ final class FieldsReader {
|
|||
* This will have the most payoff on large fields.
|
||||
*/
|
||||
private void skipField(boolean binary, boolean compressed) throws IOException {
|
||||
|
||||
int toRead = fieldsStream.readVInt();
|
||||
|
||||
skipField(binary, compressed, fieldsStream.readVInt());
|
||||
}
|
||||
|
||||
private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
|
||||
if (binary || compressed) {
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
|
@ -236,6 +240,20 @@ final class FieldsReader {
|
|||
doc.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
||||
// Read just the size -- caller must skip the field content to continue reading fields
|
||||
// Return the size in bytes or chars, depending on field type
|
||||
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
|
||||
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
|
||||
byte[] sizebytes = new byte[4];
|
||||
sizebytes[0] = (byte) (bytesize>>>24);
|
||||
sizebytes[1] = (byte) (bytesize>>>16);
|
||||
sizebytes[2] = (byte) (bytesize>>> 8);
|
||||
sizebytes[3] = (byte) bytesize ;
|
||||
doc.add(new Field(fi.name, sizebytes, Field.Store.YES));
|
||||
return size;
|
||||
}
|
||||
|
||||
private Field.TermVector getTermVectorType(FieldInfo fi) {
|
||||
Field.TermVector termVector = null;
|
||||
|
|
|
@ -17,27 +17,18 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.LoadFirstFieldSelector;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class TestFieldsReader extends TestCase {
|
||||
private RAMDirectory dir = new RAMDirectory();
|
||||
private Document testDoc = new Document();
|
||||
|
@ -225,6 +216,41 @@ public class TestFieldsReader extends TestCase {
|
|||
System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
|
||||
System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
|
||||
}
|
||||
|
||||
|
||||
public void testLoadSize() throws IOException {
|
||||
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
|
||||
Document doc;
|
||||
|
||||
doc = reader.doc(0, new FieldSelector(){
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
|
||||
fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
|
||||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
|
||||
return FieldSelectorResult.SIZE;
|
||||
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
|
||||
return FieldSelectorResult.LOAD;
|
||||
else
|
||||
return FieldSelectorResult.NO_LOAD;
|
||||
}
|
||||
});
|
||||
Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
|
||||
Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY);
|
||||
Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
assertTrue(f1.isBinary());
|
||||
assertTrue(!f3.isBinary());
|
||||
assertTrue(fb.isBinary());
|
||||
assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue());
|
||||
assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
|
||||
assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue());
|
||||
|
||||
reader.close();
|
||||
}
|
||||
|
||||
private void assertSizeEquals(int size, byte[] sizebytes) {
|
||||
assertEquals((byte) (size>>>24), sizebytes[0]);
|
||||
assertEquals((byte) (size>>>16), sizebytes[1]);
|
||||
assertEquals((byte) (size>>> 8), sizebytes[2]);
|
||||
assertEquals((byte) size , sizebytes[3]);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue