LUCENE-3892: backporting r1371010 and r1371011 to BlockPacked.

I changed the comment on the value of BLOCK_SIZE. Indeed, since PackedInts encoding/decoding is long-aligned, all numbers of bits per value that are co-prime with 64 cannot encode/decode less than 64 values per iteration, so the block size should be at least 64. Switching it to a lower value (eg. 32) should work but will perform many unnecessary operations in the encoding/decoding step. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1371114 13f79535-47bb-0310-9956-ffa450edef68
2012-08-09 09:16:09 +00:00 · 2012-08-09 09:16:09 +00:00 · 0be8bfb4fc
parent 98ae806ba6
commit 0be8bfb4fc
3 changed files with 72 additions and 49 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java
@ -41,6 +41,10 @@ public final class BlockPackedPostingsFormat extends PostingsFormat {

  private final int minTermBlockSize;
  private final int maxTermBlockSize;
+
+  // nocommit is this right?:
+  // NOTE: should be at least 64 because of PackedInts long-aligned encoding/decoding
+  // NOTE: must be factor of ... 64?
  public final static int BLOCK_SIZE = 128;

  public BlockPackedPostingsFormat() {
--- a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
@ -384,7 +384,6 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
    }
    
    private void refillDocs() throws IOException {
-      //System.out.println("["+docFreq+"]"+" refillDoc");
      final int left = docFreq - docUpto;
      assert left > 0;

@ -426,7 +425,6 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
          }
          return doc = NO_MORE_DOCS;
        }
-        //System.out.println("["+docFreq+"]"+" nextDoc");
        if (docBufferUpto == BLOCK_SIZE) {
          refillDocs();
        }
@ -486,15 +484,15 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
          skipped = true;
        }

-        final int newDocUpto = skipper.skipTo(target); 
+        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved
          if (DEBUG) {
            System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
          }
-          assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
-          docUpto = newDocUpto+1;
+          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
@ -503,6 +501,12 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
+      if (docUpto == docFreq) {
+        return doc = NO_MORE_DOCS;
+      }
+      if (docBufferUpto == BLOCK_SIZE) {
+        refillDocs();
+      }

      // Now scan... this is an inlined/pared down version
      // of nextDoc():
@ -510,18 +514,6 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        if (DEBUG) {
          System.out.println("  scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
        }
-        if (docUpto == docFreq) {
-          return doc = NO_MORE_DOCS;
-        }
-
-        // nocommit: in theory we should not hit this?  ie
-        // skipper should already have moved us to the block
-        // containing the doc?  yet assert false trips ... i
-        // think because if you advance w/o having done a
-        // nextDoc yet()... can we assert/remove this?
-        if (docBufferUpto == BLOCK_SIZE) {
-          refillDocs();
-        }
        accum += docDeltaBuffer[docBufferUpto];
        docUpto++;

@ -529,6 +521,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
          break;
        }
        docBufferUpto++;
+        if (docUpto == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
      }

      if (liveDocs == null || liveDocs.get(accum)) {
@ -666,9 +661,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
    }

    private void refillDocs() throws IOException {
-    //System.out.println("["+docFreq+"]"+" refillDoc");
      final int left = docFreq - docUpto;
      assert left > 0;
+
      if (left >= BLOCK_SIZE) {
        if (DEBUG) {
          System.out.println("    fill doc block from fp=" + docIn.getFilePointer());
@ -797,7 +792,7 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
          skipped = true;
        }

-        final int newDocUpto = skipper.skipTo(target); 
+        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved
@ -805,8 +800,8 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
            System.out.println("    skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
          }

-          assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
-          docUpto = newDocUpto+1;
+          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
@ -817,6 +812,12 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
+      if (docUpto == docFreq) {
+        return doc = NO_MORE_DOCS;
+      }
+      if (docBufferUpto == BLOCK_SIZE) {
+        refillDocs();
+      }

      // Now scan... this is an inlined/pared down version
      // of nextDoc():
@ -827,16 +828,6 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
-        // nocommit: in theory we should not hit this?  ie
-        // skipper should already have moved us to the block
-        // containing the doc?  yet assert false trips ... i
-        // think because if you advance w/o having done a
-        // nextDoc yet()... can we assert/remove this?
-        if (docBufferUpto == BLOCK_SIZE) {
-          // nocommit hmm skip freq?  but: we don't ever
-          // scan over more than one block?
-          refillDocs();
-        }
        accum += docDeltaBuffer[docBufferUpto];
        freq = freqBuffer[docBufferUpto];
        posPendingCount += freq;
@ -846,6 +837,9 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        if (accum >= target) {
          break;
        }
+        if (docUpto == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
      }

      if (liveDocs == null || liveDocs.get(accum)) {
@ -1110,7 +1104,6 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
    }

    private void refillDocs() throws IOException {
-      //System.out.println("["+docFreq+"]"+" refillDoc");
      final int left = docFreq - docUpto;
      assert left > 0;

@ -1226,7 +1219,6 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        if (docUpto == docFreq) {
          return doc = NO_MORE_DOCS;
        }
-        //System.out.println("["+docFreq+"]"+" nextDoc");
        if (docBufferUpto == BLOCK_SIZE) {
          refillDocs();
        }
@ -1293,15 +1285,15 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
          skipped = true;
        }

-        final int newDocUpto = skipper.skipTo(target); 
+        final int newDocUpto = skipper.skipTo(target) + 1; 

        if (newDocUpto > docUpto) {
          // Skipper moved
          if (DEBUG) {
            System.out.println("    skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
          }
-          assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
-          docUpto = newDocUpto+1;
+          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+          docUpto = newDocUpto;

          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
@ -1315,24 +1307,51 @@ public final class BlockPackedPostingsReader extends PostingsReaderBase {
        }
        nextSkipDoc = skipper.getNextSkipDoc();
      }
-
-      // nocommit inline nextDoc here
+      if (docUpto == docFreq) {
+        return doc = NO_MORE_DOCS;
+      }
+      if (docBufferUpto == BLOCK_SIZE) {
+        refillDocs();
+      }

      // Now scan:
-      while (nextDoc() != NO_MORE_DOCS) {
-        if (doc >= target) {
+      // Now scan:
+      while (true) {
        if (DEBUG) {
-            System.out.println("  advance return doc=" + doc);
+          System.out.println("  scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
        }
-          return doc;
+        accum += docDeltaBuffer[docBufferUpto];
+        freq = freqBuffer[docBufferUpto];
+        posPendingCount += freq;
+        docBufferUpto++;
+        docUpto++;
+
+        if (accum >= target) {
+          break;
+        }
+        if (docUpto == docFreq) {
+          return doc = NO_MORE_DOCS;
        }
      }

+      if (liveDocs == null || liveDocs.get(accum)) {
        if (DEBUG) {
-        System.out.println("  advance return doc=END");
+          System.out.println("  return doc=" + accum);
+        }
+        if (indexHasPayloads) {
+          payloadByteUpto += payloadLength;
+          payloadLength = 0;
+        }
+        position = 0;
+        payloadLength = 0;
+        lastStartOffset = 0;
+        return doc = accum;
+      } else {
+        if (DEBUG) {
+          System.out.println("  now do nextDoc()");
+        }
+        return nextDoc();
      }
-
-      return NO_MORE_DOCS;
    }

    // nocommit in theory we could avoid loading frq block
--- a/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java
@ -37,7 +37,7 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
 * block, only record skip data at the start its start point(if it exist).
 *
 * For each skip point, we will record: 
- * 1. lastDocID, 
+ * 1. docID in former position, i.e. for position 12, record docID[11], etc.
 * 2. its related file points(position, payload), 
 * 3. related numbers or uptos(position, payload).
 * 4. start offset.