Lucene 762 - Applied original patch as I could not find a nice way to handle lazy fields as part of a readField() method on FieldSelectorResult per the suggestion on 762.

All tests pass. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@505154 13f79535-47bb-0310-9956-ffa450edef68
2007-02-09 03:16:20 +00:00 · 2007-02-09 03:16:20 +00:00 · 65d27c8d72
parent edde3b691a
commit 65d27c8d72
3 changed files with 88 additions and 33 deletions
--- a/src/java/org/apache/lucene/document/FieldSelectorResult.java
+++ b/src/java/org/apache/lucene/document/FieldSelectorResult.java
@ -60,6 +60,17 @@ public final class FieldSelectorResult {
     */
  public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);

+     /** Expert:  Load the size of this {@link Field} rather than its value.
+       * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
+      * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
+      */
+  public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
+
+  /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */         
+  public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
+
+
+
  private int id;

  private FieldSelectorResult(int id) {
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@ -17,21 +17,16 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

+import org.apache.lucene.document.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.zip.DataFormatException;
 import java.util.zip.Inflater;

-import org.apache.lucene.document.AbstractField;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.document.FieldSelectorResult;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-
 /**
 * Class responsible for access to stored document fields.
 * <p/>
@ -95,25 +90,33 @@ final class FieldsReader {
      int fieldNumber = fieldsStream.readVInt();
      FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
      FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
-      boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
      
      byte bits = fieldsStream.readByte();
      boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
      boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
      boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
-      if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
+      //TODO: Find an alternative approach here if this list continues to grow beyond the
+      //list of 5 or 6 currently here.  See Lucene 762 for discussion
+      if (acceptField.equals(FieldSelectorResult.LOAD)) {
        addField(doc, fi, binary, compressed, tokenize);
      }
-      else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE) == true) {
+      else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
        addFieldForMerge(doc, fi, binary, compressed, tokenize);
      }
-      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
+      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
        addField(doc, fi, binary, compressed, tokenize);
        break;//Get out of this loop
      }
-      else if (lazy == true){
+      else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
        addFieldLazy(doc, fi, binary, compressed, tokenize);
-      }       
+      }
+      else if (acceptField.equals(FieldSelectorResult.SIZE)){
+        skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
+      }
+      else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
+        addFieldSize(doc, fi, binary, compressed);
+        break;
+      }
      else {
        skipField(binary, compressed);
      }
@ -127,9 +130,10 @@ final class FieldsReader {
   * This will have the most payoff on large fields.
   */
  private void skipField(boolean binary, boolean compressed) throws IOException {
-
-    int toRead = fieldsStream.readVInt();
-
+    skipField(binary, compressed, fieldsStream.readVInt());
+  }
+  
+  private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
    if (binary || compressed) {
      long pointer = fieldsStream.getFilePointer();
      fieldsStream.seek(pointer + toRead);
@ -236,6 +240,20 @@ final class FieldsReader {
      doc.add(f);
    }
  }
+  
+  // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
+  // Read just the size -- caller must skip the field content to continue reading fields
+  // Return the size in bytes or chars, depending on field type
+  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
+    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
+    byte[] sizebytes = new byte[4];
+    sizebytes[0] = (byte) (bytesize>>>24);
+    sizebytes[1] = (byte) (bytesize>>>16);
+    sizebytes[2] = (byte) (bytesize>>> 8);
+    sizebytes[3] = (byte)  bytesize      ;
+    doc.add(new Field(fi.name, sizebytes, Field.Store.YES));
+    return size;
+  }

  private Field.TermVector getTermVectorType(FieldInfo fi) {
    Field.TermVector termVector = null;
--- a/src/test/org/apache/lucene/index/TestFieldsReader.java
+++ b/src/test/org/apache/lucene/index/TestFieldsReader.java
@ -17,27 +17,18 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

-import java.io.File;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
 import junit.framework.TestCase;
-
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.document.LoadFirstFieldSelector;
-import org.apache.lucene.document.SetBasedFieldSelector;
+import org.apache.lucene.document.*;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util._TestUtil;

+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
 public class TestFieldsReader extends TestCase {
  private RAMDirectory dir = new RAMDirectory();
  private Document testDoc = new Document();
@ -225,6 +216,41 @@ public class TestFieldsReader extends TestCase {
    System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
    System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
  }
-
+  
+  public void testLoadSize() throws IOException {
+    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    Document doc;
+    
+    doc = reader.doc(0, new FieldSelector(){
+      public FieldSelectorResult accept(String fieldName) {
+        if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
+            fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
+            fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
+          return FieldSelectorResult.SIZE;
+        else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
+          return FieldSelectorResult.LOAD;
+        else
+          return FieldSelectorResult.NO_LOAD;
+      }
+    });
+    Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
+    Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY);
+    Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
+    assertTrue(f1.isBinary());
+    assertTrue(!f3.isBinary());
+    assertTrue(fb.isBinary());
+    assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue());
+    assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
+    assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue());
+    
+    reader.close();
+  }
+  
+  private void assertSizeEquals(int size, byte[] sizebytes) {
+    assertEquals((byte) (size>>>24), sizebytes[0]);
+    assertEquals((byte) (size>>>16), sizebytes[1]);
+    assertEquals((byte) (size>>> 8), sizebytes[2]);
+    assertEquals((byte)  size      , sizebytes[3]);
+  }

 }