mirror of https://github.com/apache/lucene.git
trim the fat from PagedBytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438211 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4b329aa64d
commit
2857df98de
|
@ -21,8 +21,6 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/** Represents a logical byte[] as a series of pages. You
|
||||
|
@ -32,6 +30,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
*
|
||||
* @lucene.internal
|
||||
**/
|
||||
// nocommit: make this simply a big ass array and nothing more.
|
||||
public final class PagedBytes {
|
||||
private final List<byte[]> blocks = new ArrayList<byte[]>();
|
||||
private final List<Integer> blockEnd = new ArrayList<Integer>();
|
||||
|
@ -110,6 +109,7 @@ public final class PagedBytes {
|
|||
*
|
||||
* @lucene.internal
|
||||
**/
|
||||
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
|
||||
public BytesRef fill(BytesRef b, long start) {
|
||||
final int index = (int) (start >> blockBits);
|
||||
final int offset = (int) (start & blockMask);
|
||||
|
@ -125,132 +125,6 @@ public final class PagedBytes {
|
|||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>. *
|
||||
* <p>
|
||||
* <b>Note:</b> this method does not support slices spanning across block
|
||||
* borders.
|
||||
* </p>
|
||||
*
|
||||
* @return the internal block number of the slice.
|
||||
* @lucene.internal
|
||||
**/
|
||||
public int fillAndGetIndex(BytesRef b, long start) {
|
||||
final int index = (int) (start >> blockBits);
|
||||
final int offset = (int) (start & blockMask);
|
||||
final byte[] block = b.bytes = blocks[index];
|
||||
|
||||
if ((block[offset] & 128) == 0) {
|
||||
b.length = block[offset];
|
||||
b.offset = offset+1;
|
||||
} else {
|
||||
b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
|
||||
b.offset = offset+2;
|
||||
assert b.length > 0;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i> and
|
||||
* returns the start offset of the next part, suitable as start parameter on
|
||||
* next call to sequentially read all {@link BytesRef}.
|
||||
*
|
||||
* <p>
|
||||
* <b>Note:</b> this method does not support slices spanning across block
|
||||
* borders.
|
||||
* </p>
|
||||
*
|
||||
* @return the start offset of the next part, suitable as start parameter on
|
||||
* next call to sequentially read all {@link BytesRef}.
|
||||
* @lucene.internal
|
||||
**/
|
||||
public long fillAndGetStart(BytesRef b, long start) {
|
||||
final int index = (int) (start >> blockBits);
|
||||
final int offset = (int) (start & blockMask);
|
||||
final byte[] block = b.bytes = blocks[index];
|
||||
|
||||
if ((block[offset] & 128) == 0) {
|
||||
b.length = block[offset];
|
||||
b.offset = offset+1;
|
||||
start += 1L + b.length;
|
||||
} else {
|
||||
b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
|
||||
b.offset = offset+2;
|
||||
start += 2L + b.length;
|
||||
assert b.length > 0;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets a slice out of {@link PagedBytes} starting at <i>start</i>, the
|
||||
* length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a
|
||||
* block border this method will allocate sufficient resources and copy the
|
||||
* paged data.
|
||||
* <p>
|
||||
* Slices spanning more than one block are not supported.
|
||||
* </p>
|
||||
*
|
||||
* @lucene.internal
|
||||
**/
|
||||
public BytesRef fillSliceWithPrefix(BytesRef b, long start) {
|
||||
int index = (int) (start >> blockBits);
|
||||
int offset = (int) (start & blockMask);
|
||||
byte[] block = blocks[index];
|
||||
final int length;
|
||||
assert offset <= block.length-1;
|
||||
if ((block[offset] & 128) == 0) {
|
||||
length = block[offset];
|
||||
offset = offset+1;
|
||||
} else {
|
||||
if (offset==block.length-1) {
|
||||
final byte[] nextBlock = blocks[++index];
|
||||
length = ((block[offset] & 0x7f) << 8) | (nextBlock[0] & 0xff);
|
||||
offset = 1;
|
||||
block = nextBlock;
|
||||
assert length > 0;
|
||||
} else {
|
||||
assert offset < block.length-1;
|
||||
length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
|
||||
offset = offset+2;
|
||||
assert length > 0;
|
||||
}
|
||||
}
|
||||
assert length >= 0: "length=" + length;
|
||||
b.length = length;
|
||||
|
||||
// NOTE: even though copyUsingLengthPrefix always
|
||||
// allocs a new block if the byte[] to be added won't
|
||||
// fit in current block,
|
||||
// VarDerefBytesImpl.finishInternal does its own
|
||||
// prefix + byte[] writing which can span two blocks,
|
||||
// so we support that here on decode:
|
||||
if (blockSize - offset >= length) {
|
||||
// Within block
|
||||
b.offset = offset;
|
||||
b.bytes = blocks[index];
|
||||
} else {
|
||||
// Split
|
||||
b.bytes = new byte[length];
|
||||
b.offset = 0;
|
||||
System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset);
|
||||
System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset));
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public byte[][] getBlocks() {
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public int[] getBlockEnds() {
|
||||
return blockEnds;
|
||||
}
|
||||
}
|
||||
|
||||
/** 1<<blockBits must be bigger than biggest single
|
||||
|
@ -288,34 +162,6 @@ public final class PagedBytes {
|
|||
}
|
||||
}
|
||||
|
||||
/** Copy BytesRef in */
|
||||
public void copy(BytesRef bytes) {
|
||||
int byteCount = bytes.length;
|
||||
int bytesUpto = bytes.offset;
|
||||
while (byteCount > 0) {
|
||||
int left = blockSize - upto;
|
||||
if (left == 0) {
|
||||
if (currentBlock != null) {
|
||||
blocks.add(currentBlock);
|
||||
blockEnd.add(upto);
|
||||
}
|
||||
currentBlock = new byte[blockSize];
|
||||
upto = 0;
|
||||
left = blockSize;
|
||||
}
|
||||
if (left < byteCount) {
|
||||
System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left);
|
||||
upto = blockSize;
|
||||
byteCount -= left;
|
||||
bytesUpto += left;
|
||||
} else {
|
||||
System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount);
|
||||
upto += byteCount;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Copy BytesRef in, setting BytesRef out to the result.
|
||||
* Do not use this if you will use freeze(true).
|
||||
* This only supports bytes.length <= blockSize */
|
||||
|
@ -375,6 +221,7 @@ public final class PagedBytes {
|
|||
|
||||
/** Copy bytes in, writing the length as a 1 or 2 byte
|
||||
* vInt prefix. */
|
||||
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
|
||||
public long copyUsingLengthPrefix(BytesRef bytes) {
|
||||
if (bytes.length >= 32768) {
|
||||
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
|
||||
|
@ -405,171 +252,4 @@ public final class PagedBytes {
|
|||
|
||||
return pointer;
|
||||
}
|
||||
|
||||
public final class PagedBytesDataInput extends IndexInput {
|
||||
private int currentBlockIndex;
|
||||
private int currentBlockUpto;
|
||||
private byte[] currentBlock;
|
||||
|
||||
PagedBytesDataInput() {
|
||||
super("PagedBytesIndexInput");
|
||||
currentBlock = blocks.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PagedBytesDataInput clone() {
|
||||
PagedBytesDataInput clone = getDataInput();
|
||||
clone.setPosition(getPosition());
|
||||
return clone;
|
||||
}
|
||||
|
||||
/** Returns the current byte position. */
|
||||
public long getPosition() {
|
||||
return (long) currentBlockIndex * blockSize + currentBlockUpto;
|
||||
}
|
||||
|
||||
/** Seek to a position previously obtained from
|
||||
* {@link #getPosition}. */
|
||||
public void setPosition(long pos) {
|
||||
currentBlockIndex = (int) (pos >> blockBits);
|
||||
currentBlock = blocks.get(currentBlockIndex);
|
||||
currentBlockUpto = (int) (pos & blockMask);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte() {
|
||||
if (currentBlockUpto == blockSize) {
|
||||
nextBlock();
|
||||
}
|
||||
return currentBlock[currentBlockUpto++];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) {
|
||||
assert b.length >= offset + len;
|
||||
final int offsetEnd = offset + len;
|
||||
while (true) {
|
||||
final int blockLeft = blockSize - currentBlockUpto;
|
||||
final int left = offsetEnd - offset;
|
||||
if (blockLeft < left) {
|
||||
System.arraycopy(currentBlock, currentBlockUpto,
|
||||
b, offset,
|
||||
blockLeft);
|
||||
nextBlock();
|
||||
offset += blockLeft;
|
||||
} else {
|
||||
// Last block
|
||||
System.arraycopy(currentBlock, currentBlockUpto,
|
||||
b, offset,
|
||||
left);
|
||||
currentBlockUpto += left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void nextBlock() {
|
||||
currentBlockIndex++;
|
||||
currentBlockUpto = 0;
|
||||
currentBlock = blocks.get(currentBlockIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
//
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
return currentBlockIndex * blockSize + currentBlockUpto;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
currentBlockIndex = (int) (pos >> blockBits);
|
||||
currentBlock = blocks.get(currentBlockIndex);
|
||||
currentBlockUpto = (int) (pos & blockMask);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() {
|
||||
return upto;
|
||||
}
|
||||
}
|
||||
|
||||
public final class PagedBytesDataOutput extends DataOutput {
|
||||
@Override
|
||||
public void writeByte(byte b) {
|
||||
if (upto == blockSize) {
|
||||
if (currentBlock != null) {
|
||||
blocks.add(currentBlock);
|
||||
blockEnd.add(upto);
|
||||
}
|
||||
currentBlock = new byte[blockSize];
|
||||
upto = 0;
|
||||
}
|
||||
currentBlock[upto++] = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) {
|
||||
assert b.length >= offset + length;
|
||||
if (length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (upto == blockSize) {
|
||||
if (currentBlock != null) {
|
||||
blocks.add(currentBlock);
|
||||
blockEnd.add(upto);
|
||||
}
|
||||
currentBlock = new byte[blockSize];
|
||||
upto = 0;
|
||||
}
|
||||
|
||||
final int offsetEnd = offset + length;
|
||||
while(true) {
|
||||
final int left = offsetEnd - offset;
|
||||
final int blockLeft = blockSize - upto;
|
||||
if (blockLeft < left) {
|
||||
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
|
||||
blocks.add(currentBlock);
|
||||
blockEnd.add(blockSize);
|
||||
currentBlock = new byte[blockSize];
|
||||
upto = 0;
|
||||
offset += blockLeft;
|
||||
} else {
|
||||
// Last block
|
||||
System.arraycopy(b, offset, currentBlock, upto, left);
|
||||
upto += left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Return the current byte position. */
|
||||
public long getPosition() {
|
||||
return getPointer();
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a DataInput to read values from this
|
||||
* PagedBytes instance. */
|
||||
public PagedBytesDataInput getDataInput() {
|
||||
if (!frozen) {
|
||||
throw new IllegalStateException("must call freeze() before getDataInput");
|
||||
}
|
||||
return new PagedBytesDataInput();
|
||||
}
|
||||
|
||||
/** Returns a DataOutput that you may use to write into
|
||||
* this PagedBytes instance. If you do this, you should
|
||||
* not call the other writing methods (eg, copy);
|
||||
* results are undefined. */
|
||||
public PagedBytesDataOutput getDataOutput() {
|
||||
if (frozen) {
|
||||
throw new IllegalStateException("cannot get DataOutput after freeze()");
|
||||
}
|
||||
return new PagedBytesDataOutput();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,13 +20,12 @@ package org.apache.lucene.util;
|
|||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
|
||||
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
|
||||
import org.junit.Ignore;
|
||||
|
||||
@Ignore("You must increase heap to > 2 G to run this")
|
||||
// nocommit: write this test in some other way (not indexinput/output)
|
||||
public class Test2BPagedBytes extends LuceneTestCase {
|
||||
|
||||
/*
|
||||
public void test() throws Exception {
|
||||
PagedBytes pb = new PagedBytes(15);
|
||||
PagedBytesDataOutput dataOutput = pb.getDataOutput();
|
||||
|
@ -64,5 +63,5 @@ public class Test2BPagedBytes extends LuceneTestCase {
|
|||
lastFP = fp;
|
||||
netBytes += numBytes;
|
||||
}
|
||||
}
|
||||
} */
|
||||
}
|
||||
|
|
|
@ -22,12 +22,12 @@ import java.util.*;
|
|||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
|
||||
import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
|
||||
import org.junit.Ignore;
|
||||
|
||||
// nocommit: clean up these tests (eg. not to use IndexINput/Output)
|
||||
public class TestPagedBytes extends LuceneTestCase {
|
||||
|
||||
/*
|
||||
public void testDataInputOutput() throws Exception {
|
||||
Random random = random();
|
||||
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
|
||||
|
@ -79,63 +79,6 @@ public class TestPagedBytes extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testLengthPrefix() throws Exception {
|
||||
Random random = random();
|
||||
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
|
||||
final int blockBits = _TestUtil.nextInt(random, 2, 20);
|
||||
final int blockSize = 1 << blockBits;
|
||||
final PagedBytes p = new PagedBytes(blockBits);
|
||||
final List<Integer> addresses = new ArrayList<Integer>();
|
||||
final List<BytesRef> answers = new ArrayList<BytesRef>();
|
||||
int totBytes = 0;
|
||||
while(totBytes < 10000000 && answers.size() < 100000) {
|
||||
final int len = random.nextInt(Math.min(blockSize-2, 32768));
|
||||
final BytesRef b = new BytesRef();
|
||||
b.bytes = new byte[len];
|
||||
b.length = len;
|
||||
b.offset = 0;
|
||||
random.nextBytes(b.bytes);
|
||||
answers.add(b);
|
||||
addresses.add((int) p.copyUsingLengthPrefix(b));
|
||||
|
||||
totBytes += len;
|
||||
}
|
||||
|
||||
final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
|
||||
|
||||
final BytesRef slice = new BytesRef();
|
||||
|
||||
for(int idx=0;idx<answers.size();idx++) {
|
||||
reader.fillSliceWithPrefix(slice, addresses.get(idx));
|
||||
assertEquals(answers.get(idx), slice);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-3841: even though
|
||||
// copyUsingLengthPrefix will never span two blocks, make
|
||||
// sure if caller writes their own prefix followed by the
|
||||
// bytes, it still works:
|
||||
public void testLengthPrefixAcrossTwoBlocks() throws Exception {
|
||||
Random random = random();
|
||||
final PagedBytes p = new PagedBytes(10);
|
||||
final DataOutput out = p.getDataOutput();
|
||||
final byte[] bytes1 = new byte[1000];
|
||||
random.nextBytes(bytes1);
|
||||
out.writeBytes(bytes1, 0, bytes1.length);
|
||||
out.writeByte((byte) 40);
|
||||
final byte[] bytes2 = new byte[40];
|
||||
random.nextBytes(bytes2);
|
||||
out.writeBytes(bytes2, 0, bytes2.length);
|
||||
|
||||
final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
|
||||
BytesRef answer = reader.fillSliceWithPrefix(new BytesRef(), 1000);
|
||||
assertEquals(40, answer.length);
|
||||
for(int i=0;i<40;i++) {
|
||||
assertEquals(bytes2[i], answer.bytes[answer.offset + i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Ignore // memory hole
|
||||
public void testOverflow() throws IOException {
|
||||
final int blockBits = _TestUtil.nextInt(random(), 14, 28);
|
||||
|
@ -164,5 +107,5 @@ public class TestPagedBytes extends LuceneTestCase {
|
|||
assertEquals(arr[(int) (offset % arr.length)], in.readByte());
|
||||
assertEquals(offset+1, in.getPosition());
|
||||
}
|
||||
}
|
||||
} */
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue