From 2857df98def70823ad494e1636c5df8e55582d6e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 24 Jan 2013 21:42:46 +0000 Subject: [PATCH] trim the fat from PagedBytes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438211 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/util/PagedBytes.java | 326 +----------------- .../apache/lucene/util/Test2BPagedBytes.java | 7 +- .../apache/lucene/util/TestPagedBytes.java | 63 +--- 3 files changed, 9 insertions(+), 387 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java index 425eed85f21..8aad226bbfe 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java +++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java @@ -21,8 +21,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexInput; /** Represents a logical byte[] as a series of pages. You @@ -32,6 +30,7 @@ import org.apache.lucene.store.IndexInput; * * @lucene.internal **/ +// nocommit: make this simply a big ass array and nothing more. public final class PagedBytes { private final List blocks = new ArrayList(); private final List blockEnd = new ArrayList(); @@ -110,6 +109,7 @@ public final class PagedBytes { * * @lucene.internal **/ + // nocommit: move this shit and any other vint bogusness to fieldcacheimpl! public BytesRef fill(BytesRef b, long start) { final int index = (int) (start >> blockBits); final int offset = (int) (start & blockMask); @@ -125,132 +125,6 @@ public final class PagedBytes { } return b; } - - /** - * Reads length as 1 or 2 byte vInt prefix, starting at start. * - *

- * Note: this method does not support slices spanning across block - * borders. - *

- * - * @return the internal block number of the slice. - * @lucene.internal - **/ - public int fillAndGetIndex(BytesRef b, long start) { - final int index = (int) (start >> blockBits); - final int offset = (int) (start & blockMask); - final byte[] block = b.bytes = blocks[index]; - - if ((block[offset] & 128) == 0) { - b.length = block[offset]; - b.offset = offset+1; - } else { - b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); - b.offset = offset+2; - assert b.length > 0; - } - return index; - } - - /** - * Reads length as 1 or 2 byte vInt prefix, starting at start and - * returns the start offset of the next part, suitable as start parameter on - * next call to sequentially read all {@link BytesRef}. - * - *

- * Note: this method does not support slices spanning across block - * borders. - *

- * - * @return the start offset of the next part, suitable as start parameter on - * next call to sequentially read all {@link BytesRef}. - * @lucene.internal - **/ - public long fillAndGetStart(BytesRef b, long start) { - final int index = (int) (start >> blockBits); - final int offset = (int) (start & blockMask); - final byte[] block = b.bytes = blocks[index]; - - if ((block[offset] & 128) == 0) { - b.length = block[offset]; - b.offset = offset+1; - start += 1L + b.length; - } else { - b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); - b.offset = offset+2; - start += 2L + b.length; - assert b.length > 0; - } - return start; - } - - - /** - * Gets a slice out of {@link PagedBytes} starting at start, the - * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a - * block border this method will allocate sufficient resources and copy the - * paged data. - *

- * Slices spanning more than one block are not supported. - *

- * - * @lucene.internal - **/ - public BytesRef fillSliceWithPrefix(BytesRef b, long start) { - int index = (int) (start >> blockBits); - int offset = (int) (start & blockMask); - byte[] block = blocks[index]; - final int length; - assert offset <= block.length-1; - if ((block[offset] & 128) == 0) { - length = block[offset]; - offset = offset+1; - } else { - if (offset==block.length-1) { - final byte[] nextBlock = blocks[++index]; - length = ((block[offset] & 0x7f) << 8) | (nextBlock[0] & 0xff); - offset = 1; - block = nextBlock; - assert length > 0; - } else { - assert offset < block.length-1; - length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); - offset = offset+2; - assert length > 0; - } - } - assert length >= 0: "length=" + length; - b.length = length; - - // NOTE: even though copyUsingLengthPrefix always - // allocs a new block if the byte[] to be added won't - // fit in current block, - // VarDerefBytesImpl.finishInternal does its own - // prefix + byte[] writing which can span two blocks, - // so we support that here on decode: - if (blockSize - offset >= length) { - // Within block - b.offset = offset; - b.bytes = blocks[index]; - } else { - // Split - b.bytes = new byte[length]; - b.offset = 0; - System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset); - System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset)); - } - return b; - } - - /** @lucene.internal */ - public byte[][] getBlocks() { - return blocks; - } - - /** @lucene.internal */ - public int[] getBlockEnds() { - return blockEnds; - } } /** 1<<blockBits must be bigger than biggest single @@ -288,34 +162,6 @@ public final class PagedBytes { } } - /** Copy BytesRef in */ - public void copy(BytesRef bytes) { - int byteCount = bytes.length; - int bytesUpto = bytes.offset; - while (byteCount > 0) { - int left = blockSize - upto; - if (left == 0) { - if (currentBlock != null) { - blocks.add(currentBlock); - blockEnd.add(upto); - } - currentBlock = new byte[blockSize]; - upto = 0; - left = blockSize; - } - if (left < byteCount) { - System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left); - upto = blockSize; - byteCount -= left; - bytesUpto += left; - } else { - System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount); - upto += byteCount; - break; - } - } - } - /** Copy BytesRef in, setting BytesRef out to the result. * Do not use this if you will use freeze(true). * This only supports bytes.length <= blockSize */ @@ -375,6 +221,7 @@ public final class PagedBytes { /** Copy bytes in, writing the length as a 1 or 2 byte * vInt prefix. */ + // nocommit: move this shit and any other vint bogusness to fieldcacheimpl! public long copyUsingLengthPrefix(BytesRef bytes) { if (bytes.length >= 32768) { throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")"); @@ -405,171 +252,4 @@ public final class PagedBytes { return pointer; } - - public final class PagedBytesDataInput extends IndexInput { - private int currentBlockIndex; - private int currentBlockUpto; - private byte[] currentBlock; - - PagedBytesDataInput() { - super("PagedBytesIndexInput"); - currentBlock = blocks.get(0); - } - - @Override - public PagedBytesDataInput clone() { - PagedBytesDataInput clone = getDataInput(); - clone.setPosition(getPosition()); - return clone; - } - - /** Returns the current byte position. */ - public long getPosition() { - return (long) currentBlockIndex * blockSize + currentBlockUpto; - } - - /** Seek to a position previously obtained from - * {@link #getPosition}. */ - public void setPosition(long pos) { - currentBlockIndex = (int) (pos >> blockBits); - currentBlock = blocks.get(currentBlockIndex); - currentBlockUpto = (int) (pos & blockMask); - } - - @Override - public byte readByte() { - if (currentBlockUpto == blockSize) { - nextBlock(); - } - return currentBlock[currentBlockUpto++]; - } - - @Override - public void readBytes(byte[] b, int offset, int len) { - assert b.length >= offset + len; - final int offsetEnd = offset + len; - while (true) { - final int blockLeft = blockSize - currentBlockUpto; - final int left = offsetEnd - offset; - if (blockLeft < left) { - System.arraycopy(currentBlock, currentBlockUpto, - b, offset, - blockLeft); - nextBlock(); - offset += blockLeft; - } else { - // Last block - System.arraycopy(currentBlock, currentBlockUpto, - b, offset, - left); - currentBlockUpto += left; - break; - } - } - } - - private void nextBlock() { - currentBlockIndex++; - currentBlockUpto = 0; - currentBlock = blocks.get(currentBlockIndex); - } - - @Override - public void close() throws IOException { - // - } - - @Override - public long getFilePointer() { - return currentBlockIndex * blockSize + currentBlockUpto; - } - - @Override - public void seek(long pos) throws IOException { - currentBlockIndex = (int) (pos >> blockBits); - currentBlock = blocks.get(currentBlockIndex); - currentBlockUpto = (int) (pos & blockMask); - } - - @Override - public long length() { - return upto; - } - } - - public final class PagedBytesDataOutput extends DataOutput { - @Override - public void writeByte(byte b) { - if (upto == blockSize) { - if (currentBlock != null) { - blocks.add(currentBlock); - blockEnd.add(upto); - } - currentBlock = new byte[blockSize]; - upto = 0; - } - currentBlock[upto++] = b; - } - - @Override - public void writeBytes(byte[] b, int offset, int length) { - assert b.length >= offset + length; - if (length == 0) { - return; - } - - if (upto == blockSize) { - if (currentBlock != null) { - blocks.add(currentBlock); - blockEnd.add(upto); - } - currentBlock = new byte[blockSize]; - upto = 0; - } - - final int offsetEnd = offset + length; - while(true) { - final int left = offsetEnd - offset; - final int blockLeft = blockSize - upto; - if (blockLeft < left) { - System.arraycopy(b, offset, currentBlock, upto, blockLeft); - blocks.add(currentBlock); - blockEnd.add(blockSize); - currentBlock = new byte[blockSize]; - upto = 0; - offset += blockLeft; - } else { - // Last block - System.arraycopy(b, offset, currentBlock, upto, left); - upto += left; - break; - } - } - } - - /** Return the current byte position. */ - public long getPosition() { - return getPointer(); - } - } - - /** Returns a DataInput to read values from this - * PagedBytes instance. */ - public PagedBytesDataInput getDataInput() { - if (!frozen) { - throw new IllegalStateException("must call freeze() before getDataInput"); - } - return new PagedBytesDataInput(); - } - - /** Returns a DataOutput that you may use to write into - * this PagedBytes instance. If you do this, you should - * not call the other writing methods (eg, copy); - * results are undefined. */ - public PagedBytesDataOutput getDataOutput() { - if (frozen) { - throw new IllegalStateException("cannot get DataOutput after freeze()"); - } - return new PagedBytesDataOutput(); - } } diff --git a/lucene/core/src/test/org/apache/lucene/util/Test2BPagedBytes.java b/lucene/core/src/test/org/apache/lucene/util/Test2BPagedBytes.java index 23b4ac043b8..63eef6c0c21 100644 --- a/lucene/core/src/test/org/apache/lucene/util/Test2BPagedBytes.java +++ b/lucene/core/src/test/org/apache/lucene/util/Test2BPagedBytes.java @@ -20,13 +20,12 @@ package org.apache.lucene.util; import java.util.Arrays; import java.util.Random; -import org.apache.lucene.util.PagedBytes.PagedBytesDataInput; -import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput; import org.junit.Ignore; @Ignore("You must increase heap to > 2 G to run this") +// nocommit: write this test in some other way (not indexinput/output) public class Test2BPagedBytes extends LuceneTestCase { - +/* public void test() throws Exception { PagedBytes pb = new PagedBytes(15); PagedBytesDataOutput dataOutput = pb.getDataOutput(); @@ -64,5 +63,5 @@ public class Test2BPagedBytes extends LuceneTestCase { lastFP = fp; netBytes += numBytes; } - } + } */ } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java index d5921200db6..0b38903e982 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java @@ -22,12 +22,12 @@ import java.util.*; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; -import org.apache.lucene.util.PagedBytes.PagedBytesDataInput; -import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput; import org.junit.Ignore; +// nocommit: clean up these tests (eg. not to use IndexINput/Output) public class TestPagedBytes extends LuceneTestCase { + /* public void testDataInputOutput() throws Exception { Random random = random(); for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) { @@ -79,63 +79,6 @@ public class TestPagedBytes extends LuceneTestCase { } } - public void testLengthPrefix() throws Exception { - Random random = random(); - for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) { - final int blockBits = _TestUtil.nextInt(random, 2, 20); - final int blockSize = 1 << blockBits; - final PagedBytes p = new PagedBytes(blockBits); - final List addresses = new ArrayList(); - final List answers = new ArrayList(); - int totBytes = 0; - while(totBytes < 10000000 && answers.size() < 100000) { - final int len = random.nextInt(Math.min(blockSize-2, 32768)); - final BytesRef b = new BytesRef(); - b.bytes = new byte[len]; - b.length = len; - b.offset = 0; - random.nextBytes(b.bytes); - answers.add(b); - addresses.add((int) p.copyUsingLengthPrefix(b)); - - totBytes += len; - } - - final PagedBytes.Reader reader = p.freeze(random.nextBoolean()); - - final BytesRef slice = new BytesRef(); - - for(int idx=0;idx