trim the fat from PagedBytes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438211 13f79535-47bb-0310-9956-ffa450edef68
2013-01-24 21:42:46 +00:00 · 2013-01-24 21:42:46 +00:00 · 2857df98de
parent 4b329aa64d
commit 2857df98de
3 changed files with 9 additions and 387 deletions
--- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
+++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
@ -21,8 +21,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;

 /** Represents a logical byte[] as a series of pages.  You
@ -32,6 +30,7 @@ import org.apache.lucene.store.IndexInput;
 *
 * @lucene.internal
 **/
+// nocommit: make this simply a big ass array and nothing more.
 public final class PagedBytes {
  private final List<byte[]> blocks = new ArrayList<byte[]>();
  private final List<Integer> blockEnd = new ArrayList<Integer>();
@ -110,6 +109,7 @@ public final class PagedBytes {
     * 
     * @lucene.internal
     **/
+    // nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
    public BytesRef fill(BytesRef b, long start) {
      final int index = (int) (start >> blockBits);
      final int offset = (int) (start & blockMask);
@ -125,132 +125,6 @@ public final class PagedBytes {
      }
      return b;
    }
-
-    /**
-     * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>. *
-     * <p>
-     * <b>Note:</b> this method does not support slices spanning across block
-     * borders.
-     * </p>
-     * 
-     * @return the internal block number of the slice.
-     * @lucene.internal
-     **/
-    public int fillAndGetIndex(BytesRef b, long start) {
-      final int index = (int) (start >> blockBits);
-      final int offset = (int) (start & blockMask);
-      final byte[] block = b.bytes = blocks[index];
-
-      if ((block[offset] & 128) == 0) {
-        b.length = block[offset];
-        b.offset = offset+1;
-      } else {
-        b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
-        b.offset = offset+2;
-        assert b.length > 0;
-      }
-      return index;
-    }
-
-    /**
-     * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i> and
-     * returns the start offset of the next part, suitable as start parameter on
-     * next call to sequentially read all {@link BytesRef}.
-     * 
-     * <p>
-     * <b>Note:</b> this method does not support slices spanning across block
-     * borders.
-     * </p>
-     * 
-     * @return the start offset of the next part, suitable as start parameter on
-     *         next call to sequentially read all {@link BytesRef}.
-     * @lucene.internal
-     **/
-    public long fillAndGetStart(BytesRef b, long start) {
-      final int index = (int) (start >> blockBits);
-      final int offset = (int) (start & blockMask);
-      final byte[] block = b.bytes = blocks[index];
-
-      if ((block[offset] & 128) == 0) {
-        b.length = block[offset];
-        b.offset = offset+1;
-        start += 1L + b.length;
-      } else {
-        b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
-        b.offset = offset+2;
-        start += 2L + b.length;
-        assert b.length > 0;
-      }
-      return start;
-    }
-    
-  
-    /**
-     * Gets a slice out of {@link PagedBytes} starting at <i>start</i>, the
-     * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a
-     * block border this method will allocate sufficient resources and copy the
-     * paged data.
-     * <p>
-     * Slices spanning more than one block are not supported.
-     * </p>
-     * 
-     * @lucene.internal
-     **/
-    public BytesRef fillSliceWithPrefix(BytesRef b, long start) {
-      int index = (int) (start >> blockBits);
-      int offset = (int) (start & blockMask);
-      byte[] block = blocks[index];
-      final int length;
-      assert offset <= block.length-1;
-      if ((block[offset] & 128) == 0) {
-        length = block[offset];
-        offset = offset+1;
-      } else {
-        if (offset==block.length-1) {
-          final byte[] nextBlock = blocks[++index];
-          length = ((block[offset] & 0x7f) << 8) | (nextBlock[0] & 0xff);
-          offset = 1;
-          block = nextBlock;
-          assert length > 0; 
-        } else {
-          assert offset < block.length-1;
-          length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
-          offset = offset+2;
-          assert length > 0;
-        }
-      }
-      assert length >= 0: "length=" + length;
-      b.length = length;
-
-      // NOTE: even though copyUsingLengthPrefix always
-      // allocs a new block if the byte[] to be added won't
-      // fit in current block,
-      // VarDerefBytesImpl.finishInternal does its own
-      // prefix + byte[] writing which can span two blocks,
-      // so we support that here on decode:
-      if (blockSize - offset >= length) {
-        // Within block
-        b.offset = offset;
-        b.bytes = blocks[index];
-      } else {
-        // Split
-        b.bytes = new byte[length];
-        b.offset = 0;
-        System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset);
-        System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset));
-      }
-      return b;
-    }
-
-    /** @lucene.internal */
-    public byte[][] getBlocks() {
-      return blocks;
-    }
-
-    /** @lucene.internal */
-    public int[] getBlockEnds() {
-      return blockEnds;
-    }
  }

  /** 1&lt;&lt;blockBits must be bigger than biggest single
@ -288,34 +162,6 @@ public final class PagedBytes {
    }
  }

-  /** Copy BytesRef in */
-  public void copy(BytesRef bytes) {
-    int byteCount = bytes.length;
-    int bytesUpto = bytes.offset;
-    while (byteCount > 0) {
-      int left = blockSize - upto;
-      if (left == 0) {
-        if (currentBlock != null) {
-          blocks.add(currentBlock);
-          blockEnd.add(upto);          
-        }
-        currentBlock = new byte[blockSize];
-        upto = 0;
-        left = blockSize;
-      }
-      if (left < byteCount) {
-        System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left);
-        upto = blockSize;
-        byteCount -= left;
-        bytesUpto += left;
-      } else {
-        System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount);
-        upto += byteCount;
-        break;
-      }
-    }
-  }
-
  /** Copy BytesRef in, setting BytesRef out to the result.
   * Do not use this if you will use freeze(true).
   * This only supports bytes.length <= blockSize */
@ -375,6 +221,7 @@ public final class PagedBytes {

  /** Copy bytes in, writing the length as a 1 or 2 byte
   *  vInt prefix. */
+  // nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
  public long copyUsingLengthPrefix(BytesRef bytes) {
    if (bytes.length >= 32768) {
      throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
@ -405,171 +252,4 @@ public final class PagedBytes {

    return pointer;
  }
-
-  public final class PagedBytesDataInput extends IndexInput {
-    private int currentBlockIndex;
-    private int currentBlockUpto;
-    private byte[] currentBlock;
-
-    PagedBytesDataInput() {
-      super("PagedBytesIndexInput");
-      currentBlock = blocks.get(0);
-    }
-
-    @Override
-    public PagedBytesDataInput clone() {
-      PagedBytesDataInput clone = getDataInput();
-      clone.setPosition(getPosition());
-      return clone;
-    }
-
-    /** Returns the current byte position. */
-    public long getPosition() {
-      return (long) currentBlockIndex * blockSize + currentBlockUpto;
-    }
-  
-    /** Seek to a position previously obtained from
-     *  {@link #getPosition}. */
-    public void setPosition(long pos) {
-      currentBlockIndex = (int) (pos >> blockBits);
-      currentBlock = blocks.get(currentBlockIndex);
-      currentBlockUpto = (int) (pos & blockMask);
-    }
-
-    @Override
-    public byte readByte() {
-      if (currentBlockUpto == blockSize) {
-        nextBlock();
-      }
-      return currentBlock[currentBlockUpto++];
-    }
-
-    @Override
-    public void readBytes(byte[] b, int offset, int len) {
-      assert b.length >= offset + len;
-      final int offsetEnd = offset + len;
-      while (true) {
-        final int blockLeft = blockSize - currentBlockUpto;
-        final int left = offsetEnd - offset;
-        if (blockLeft < left) {
-          System.arraycopy(currentBlock, currentBlockUpto,
-                           b, offset,
-                           blockLeft);
-          nextBlock();
-          offset += blockLeft;
-        } else {
-          // Last block
-          System.arraycopy(currentBlock, currentBlockUpto,
-                           b, offset,
-                           left);
-          currentBlockUpto += left;
-          break;
-        }
-      }
-    }
-
-    private void nextBlock() {
-      currentBlockIndex++;
-      currentBlockUpto = 0;
-      currentBlock = blocks.get(currentBlockIndex);
-    }
-
-    @Override
-    public void close() throws IOException {
-      //
-    }
-
-    @Override
-    public long getFilePointer() {
-      return currentBlockIndex * blockSize + currentBlockUpto;
-    }
-
-    @Override
-    public void seek(long pos) throws IOException {
-      currentBlockIndex = (int) (pos >> blockBits);
-      currentBlock = blocks.get(currentBlockIndex);
-      currentBlockUpto = (int) (pos & blockMask);
-    }
-
-    @Override
-    public long length() {
-      return upto;
-    }
-  }
-
-  public final class PagedBytesDataOutput extends DataOutput {
-    @Override
-    public void writeByte(byte b) {
-      if (upto == blockSize) {
-        if (currentBlock != null) {
-          blocks.add(currentBlock);
-          blockEnd.add(upto);
-        }
-        currentBlock = new byte[blockSize];
-        upto = 0;
-      }
-      currentBlock[upto++] = b;
-    }
-
-    @Override
-    public void writeBytes(byte[] b, int offset, int length) {
-      assert b.length >= offset + length;
-      if (length == 0) {
-        return;
-      }
-
-      if (upto == blockSize) {
-        if (currentBlock != null) {
-          blocks.add(currentBlock);
-          blockEnd.add(upto);
-        }
-        currentBlock = new byte[blockSize];
-        upto = 0;
-      }
-          
-      final int offsetEnd = offset + length;
-      while(true) {
-        final int left = offsetEnd - offset;
-        final int blockLeft = blockSize - upto;
-        if (blockLeft < left) {
-          System.arraycopy(b, offset, currentBlock, upto, blockLeft);
-          blocks.add(currentBlock);
-          blockEnd.add(blockSize);
-          currentBlock = new byte[blockSize];
-          upto = 0;
-          offset += blockLeft;
-        } else {
-          // Last block
-          System.arraycopy(b, offset, currentBlock, upto, left);
-          upto += left;
-          break;
-        }
-      }
-    }
-
-    /** Return the current byte position. */
-    public long getPosition() {
-      return getPointer();
-    }
-  }
-
-  /** Returns a DataInput to read values from this
-   *  PagedBytes instance. */
-  public PagedBytesDataInput getDataInput() {
-    if (!frozen) {
-      throw new IllegalStateException("must call freeze() before getDataInput");
-    }
-    return new PagedBytesDataInput();
-  }
-
-  /** Returns a DataOutput that you may use to write into
-   *  this PagedBytes instance.  If you do this, you should
-   *  not call the other writing methods (eg, copy);
-   *  results are undefined. */
-  public PagedBytesDataOutput getDataOutput() {
-    if (frozen) {
-      throw new IllegalStateException("cannot get DataOutput after freeze()");
-    }
-    return new PagedBytesDataOutput();
-  }
 }
--- a/lucene/core/src/test/org/apache/lucene/util/Test2BPagedBytes.java
+++ b/lucene/core/src/test/org/apache/lucene/util/Test2BPagedBytes.java
@ -20,13 +20,12 @@ package org.apache.lucene.util;
 import java.util.Arrays;
 import java.util.Random;

-import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
-import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
 import org.junit.Ignore;

@Ignore("You must increase heap to > 2 G to run this")
+// nocommit: write this test in some other way (not indexinput/output)
 public class Test2BPagedBytes extends LuceneTestCase {
-
+/*
  public void test() throws Exception {
    PagedBytes pb = new PagedBytes(15);
    PagedBytesDataOutput dataOutput = pb.getDataOutput();
@ -64,5 +63,5 @@ public class Test2BPagedBytes extends LuceneTestCase {
      lastFP = fp;
      netBytes += numBytes;
    }
-  }
+  } */
 }
--- a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
@ -22,12 +22,12 @@ import java.util.*;

 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
-import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
 import org.junit.Ignore;

+// nocommit: clean up these tests (eg. not to use IndexINput/Output)
 public class TestPagedBytes extends LuceneTestCase {

+  /*
  public void testDataInputOutput() throws Exception {
    Random random = random();
    for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
@ -79,63 +79,6 @@ public class TestPagedBytes extends LuceneTestCase {
    }
  }

-  public void testLengthPrefix() throws Exception {
-    Random random = random();
-    for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
-      final int blockBits = _TestUtil.nextInt(random, 2, 20);
-      final int blockSize = 1 << blockBits;
-      final PagedBytes p = new PagedBytes(blockBits);
-      final List<Integer> addresses = new ArrayList<Integer>();
-      final List<BytesRef> answers = new ArrayList<BytesRef>();
-      int totBytes = 0;
-      while(totBytes < 10000000 && answers.size() < 100000) {
-        final int len = random.nextInt(Math.min(blockSize-2, 32768));
-        final BytesRef b = new BytesRef();
-        b.bytes = new byte[len];
-        b.length = len;
-        b.offset = 0;
-        random.nextBytes(b.bytes);
-        answers.add(b);
-        addresses.add((int) p.copyUsingLengthPrefix(b));
-
-        totBytes += len;
-      }
-
-      final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
-
-      final BytesRef slice = new BytesRef();
-
-      for(int idx=0;idx<answers.size();idx++) {
-        reader.fillSliceWithPrefix(slice, addresses.get(idx));
-        assertEquals(answers.get(idx), slice);
-      }
-    }
-  }
-
-  // LUCENE-3841: even though
-  // copyUsingLengthPrefix will never span two blocks, make
-  // sure if caller writes their own prefix followed by the
-  // bytes, it still works:
-  public void testLengthPrefixAcrossTwoBlocks() throws Exception {
-    Random random = random();
-    final PagedBytes p = new PagedBytes(10);
-    final DataOutput out = p.getDataOutput();
-    final byte[] bytes1 = new byte[1000];
-    random.nextBytes(bytes1);
-    out.writeBytes(bytes1, 0, bytes1.length);
-    out.writeByte((byte) 40);
-    final byte[] bytes2 = new byte[40];
-    random.nextBytes(bytes2);
-    out.writeBytes(bytes2, 0, bytes2.length);
-
-    final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
-    BytesRef answer = reader.fillSliceWithPrefix(new BytesRef(), 1000);
-    assertEquals(40, answer.length);
-    for(int i=0;i<40;i++) {
-      assertEquals(bytes2[i], answer.bytes[answer.offset + i]);
-    }
-  }
-
  @Ignore // memory hole
  public void testOverflow() throws IOException {
    final int blockBits = _TestUtil.nextInt(random(), 14, 28);
@ -164,5 +107,5 @@ public class TestPagedBytes extends LuceneTestCase {
      assertEquals(arr[(int) (offset % arr.length)], in.readByte());
      assertEquals(offset+1, in.getPosition());
    }
-  }
+  } */
 }