trim the fat from PagedBytes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438211 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-24 21:42:46 +00:00
parent 4b329aa64d
commit 2857df98de
3 changed files with 9 additions and 387 deletions

View File

@ -21,8 +21,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
/** Represents a logical byte[] as a series of pages. You
@ -32,6 +30,7 @@ import org.apache.lucene.store.IndexInput;
*
* @lucene.internal
**/
// nocommit: make this simply a big ass array and nothing more.
public final class PagedBytes {
private final List<byte[]> blocks = new ArrayList<byte[]>();
private final List<Integer> blockEnd = new ArrayList<Integer>();
@ -110,6 +109,7 @@ public final class PagedBytes {
*
* @lucene.internal
**/
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
public BytesRef fill(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
@ -125,132 +125,6 @@ public final class PagedBytes {
}
return b;
}
/**
* Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>. *
* <p>
* <b>Note:</b> this method does not support slices spanning across block
* borders.
* </p>
*
* @return the internal block number of the slice.
* @lucene.internal
**/
public int fillAndGetIndex(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
final byte[] block = b.bytes = blocks[index];
if ((block[offset] & 128) == 0) {
b.length = block[offset];
b.offset = offset+1;
} else {
b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
b.offset = offset+2;
assert b.length > 0;
}
return index;
}
/**
* Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i> and
* returns the start offset of the next part, suitable as start parameter on
* next call to sequentially read all {@link BytesRef}.
*
* <p>
* <b>Note:</b> this method does not support slices spanning across block
* borders.
* </p>
*
* @return the start offset of the next part, suitable as start parameter on
* next call to sequentially read all {@link BytesRef}.
* @lucene.internal
**/
public long fillAndGetStart(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
final byte[] block = b.bytes = blocks[index];
if ((block[offset] & 128) == 0) {
b.length = block[offset];
b.offset = offset+1;
start += 1L + b.length;
} else {
b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
b.offset = offset+2;
start += 2L + b.length;
assert b.length > 0;
}
return start;
}
/**
* Gets a slice out of {@link PagedBytes} starting at <i>start</i>, the
* length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a
* block border this method will allocate sufficient resources and copy the
* paged data.
* <p>
* Slices spanning more than one block are not supported.
* </p>
*
* @lucene.internal
**/
public BytesRef fillSliceWithPrefix(BytesRef b, long start) {
int index = (int) (start >> blockBits);
int offset = (int) (start & blockMask);
byte[] block = blocks[index];
final int length;
assert offset <= block.length-1;
if ((block[offset] & 128) == 0) {
length = block[offset];
offset = offset+1;
} else {
if (offset==block.length-1) {
final byte[] nextBlock = blocks[++index];
length = ((block[offset] & 0x7f) << 8) | (nextBlock[0] & 0xff);
offset = 1;
block = nextBlock;
assert length > 0;
} else {
assert offset < block.length-1;
length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
offset = offset+2;
assert length > 0;
}
}
assert length >= 0: "length=" + length;
b.length = length;
// NOTE: even though copyUsingLengthPrefix always
// allocs a new block if the byte[] to be added won't
// fit in current block,
// VarDerefBytesImpl.finishInternal does its own
// prefix + byte[] writing which can span two blocks,
// so we support that here on decode:
if (blockSize - offset >= length) {
// Within block
b.offset = offset;
b.bytes = blocks[index];
} else {
// Split
b.bytes = new byte[length];
b.offset = 0;
System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset);
System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset));
}
return b;
}
/** @lucene.internal */
public byte[][] getBlocks() {
return blocks;
}
/** @lucene.internal */
public int[] getBlockEnds() {
return blockEnds;
}
}
/** 1&lt;&lt;blockBits must be bigger than biggest single
@ -288,34 +162,6 @@ public final class PagedBytes {
}
}
/** Copy BytesRef in */
public void copy(BytesRef bytes) {
int byteCount = bytes.length;
int bytesUpto = bytes.offset;
while (byteCount > 0) {
int left = blockSize - upto;
if (left == 0) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
}
if (left < byteCount) {
System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left);
upto = blockSize;
byteCount -= left;
bytesUpto += left;
} else {
System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount);
upto += byteCount;
break;
}
}
}
/** Copy BytesRef in, setting BytesRef out to the result.
* Do not use this if you will use freeze(true).
* This only supports bytes.length <= blockSize */
@ -375,6 +221,7 @@ public final class PagedBytes {
/** Copy bytes in, writing the length as a 1 or 2 byte
* vInt prefix. */
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
public long copyUsingLengthPrefix(BytesRef bytes) {
if (bytes.length >= 32768) {
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
@ -405,171 +252,4 @@ public final class PagedBytes {
return pointer;
}
public final class PagedBytesDataInput extends IndexInput {
private int currentBlockIndex;
private int currentBlockUpto;
private byte[] currentBlock;
PagedBytesDataInput() {
super("PagedBytesIndexInput");
currentBlock = blocks.get(0);
}
@Override
public PagedBytesDataInput clone() {
PagedBytesDataInput clone = getDataInput();
clone.setPosition(getPosition());
return clone;
}
/** Returns the current byte position. */
public long getPosition() {
return (long) currentBlockIndex * blockSize + currentBlockUpto;
}
/** Seek to a position previously obtained from
* {@link #getPosition}. */
public void setPosition(long pos) {
currentBlockIndex = (int) (pos >> blockBits);
currentBlock = blocks.get(currentBlockIndex);
currentBlockUpto = (int) (pos & blockMask);
}
@Override
public byte readByte() {
if (currentBlockUpto == blockSize) {
nextBlock();
}
return currentBlock[currentBlockUpto++];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
assert b.length >= offset + len;
final int offsetEnd = offset + len;
while (true) {
final int blockLeft = blockSize - currentBlockUpto;
final int left = offsetEnd - offset;
if (blockLeft < left) {
System.arraycopy(currentBlock, currentBlockUpto,
b, offset,
blockLeft);
nextBlock();
offset += blockLeft;
} else {
// Last block
System.arraycopy(currentBlock, currentBlockUpto,
b, offset,
left);
currentBlockUpto += left;
break;
}
}
}
private void nextBlock() {
currentBlockIndex++;
currentBlockUpto = 0;
currentBlock = blocks.get(currentBlockIndex);
}
@Override
public void close() throws IOException {
//
}
@Override
public long getFilePointer() {
return currentBlockIndex * blockSize + currentBlockUpto;
}
@Override
public void seek(long pos) throws IOException {
currentBlockIndex = (int) (pos >> blockBits);
currentBlock = blocks.get(currentBlockIndex);
currentBlockUpto = (int) (pos & blockMask);
}
@Override
public long length() {
return upto;
}
}
public final class PagedBytesDataOutput extends DataOutput {
@Override
public void writeByte(byte b) {
if (upto == blockSize) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
}
currentBlock[upto++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int length) {
assert b.length >= offset + length;
if (length == 0) {
return;
}
if (upto == blockSize) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final int offsetEnd = offset + length;
while(true) {
final int left = offsetEnd - offset;
final int blockLeft = blockSize - upto;
if (blockLeft < left) {
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
blocks.add(currentBlock);
blockEnd.add(blockSize);
currentBlock = new byte[blockSize];
upto = 0;
offset += blockLeft;
} else {
// Last block
System.arraycopy(b, offset, currentBlock, upto, left);
upto += left;
break;
}
}
}
/** Return the current byte position. */
public long getPosition() {
return getPointer();
}
}
/** Returns a DataInput to read values from this
* PagedBytes instance. */
public PagedBytesDataInput getDataInput() {
if (!frozen) {
throw new IllegalStateException("must call freeze() before getDataInput");
}
return new PagedBytesDataInput();
}
/** Returns a DataOutput that you may use to write into
* this PagedBytes instance. If you do this, you should
* not call the other writing methods (eg, copy);
* results are undefined. */
public PagedBytesDataOutput getDataOutput() {
if (frozen) {
throw new IllegalStateException("cannot get DataOutput after freeze()");
}
return new PagedBytesDataOutput();
}
}

View File

@ -20,13 +20,12 @@ package org.apache.lucene.util;
import java.util.Arrays;
import java.util.Random;
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
import org.junit.Ignore;
@Ignore("You must increase heap to > 2 G to run this")
// nocommit: write this test in some other way (not indexinput/output)
public class Test2BPagedBytes extends LuceneTestCase {
/*
public void test() throws Exception {
PagedBytes pb = new PagedBytes(15);
PagedBytesDataOutput dataOutput = pb.getDataOutput();
@ -64,5 +63,5 @@ public class Test2BPagedBytes extends LuceneTestCase {
lastFP = fp;
netBytes += numBytes;
}
}
} */
}

View File

@ -22,12 +22,12 @@ import java.util.*;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
import org.junit.Ignore;
// nocommit: clean up these tests (eg. not to use IndexINput/Output)
public class TestPagedBytes extends LuceneTestCase {
/*
public void testDataInputOutput() throws Exception {
Random random = random();
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
@ -79,63 +79,6 @@ public class TestPagedBytes extends LuceneTestCase {
}
}
public void testLengthPrefix() throws Exception {
Random random = random();
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
final int blockBits = _TestUtil.nextInt(random, 2, 20);
final int blockSize = 1 << blockBits;
final PagedBytes p = new PagedBytes(blockBits);
final List<Integer> addresses = new ArrayList<Integer>();
final List<BytesRef> answers = new ArrayList<BytesRef>();
int totBytes = 0;
while(totBytes < 10000000 && answers.size() < 100000) {
final int len = random.nextInt(Math.min(blockSize-2, 32768));
final BytesRef b = new BytesRef();
b.bytes = new byte[len];
b.length = len;
b.offset = 0;
random.nextBytes(b.bytes);
answers.add(b);
addresses.add((int) p.copyUsingLengthPrefix(b));
totBytes += len;
}
final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
final BytesRef slice = new BytesRef();
for(int idx=0;idx<answers.size();idx++) {
reader.fillSliceWithPrefix(slice, addresses.get(idx));
assertEquals(answers.get(idx), slice);
}
}
}
// LUCENE-3841: even though
// copyUsingLengthPrefix will never span two blocks, make
// sure if caller writes their own prefix followed by the
// bytes, it still works:
public void testLengthPrefixAcrossTwoBlocks() throws Exception {
Random random = random();
final PagedBytes p = new PagedBytes(10);
final DataOutput out = p.getDataOutput();
final byte[] bytes1 = new byte[1000];
random.nextBytes(bytes1);
out.writeBytes(bytes1, 0, bytes1.length);
out.writeByte((byte) 40);
final byte[] bytes2 = new byte[40];
random.nextBytes(bytes2);
out.writeBytes(bytes2, 0, bytes2.length);
final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
BytesRef answer = reader.fillSliceWithPrefix(new BytesRef(), 1000);
assertEquals(40, answer.length);
for(int i=0;i<40;i++) {
assertEquals(bytes2[i], answer.bytes[answer.offset + i]);
}
}
@Ignore // memory hole
public void testOverflow() throws IOException {
final int blockBits = _TestUtil.nextInt(random(), 14, 28);
@ -164,5 +107,5 @@ public class TestPagedBytes extends LuceneTestCase {
assertEquals(arr[(int) (offset % arr.length)], in.readByte());
assertEquals(offset+1, in.getPosition());
}
}
} */
}