LUCENE-4678: add BytesStore.truncate and use it to not double-buffer during pack

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1432646 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-01-13 15:49:35 +00:00
parent 55c5b77cf0
commit aff120659c
5 changed files with 93 additions and 43 deletions

View File

@ -69,6 +69,14 @@ class BytesStore extends DataOutput {
nextWrite = blocks.get(blocks.size()-1).length;
}
/** Absolute write byte; you must ensure dest is < max
* position written so far. */
public void writeByte(int dest, byte b) {
int blockIndex = dest >> blockBits;
byte[] block = blocks.get(blockIndex);
block[dest & blockMask] = b;
}
@Override
public void writeByte(byte b) {
if (nextWrite == blockSize) {
@ -237,9 +245,10 @@ class BytesStore extends DataOutput {
}
}
/** Reverse the last numBytes. */
/** Reverse from srcPos, inclusive, to destPos, inclusive. */
public void reverse(int srcPos, int destPos) {
assert srcPos < destPos;
assert destPos < getPosition();
//System.out.println("reverse src=" + srcPos + " dest=" + destPos);
int srcBlockIndex = srcPos >> blockBits;
@ -275,7 +284,7 @@ class BytesStore extends DataOutput {
}
}
public void skip(int len) {
public void skipBytes(int len) {
while (len > 0) {
int chunk = blockSize - nextWrite;
if (len <= chunk) {
@ -294,6 +303,26 @@ class BytesStore extends DataOutput {
return (blocks.size()-1) * blockSize + nextWrite;
}
/** Pos must be less than the max position written so far!
* Ie, you cannot "grow" the file with this! */
public void truncate(int newLen) {
assert newLen <= getPosition();
assert newLen >= 0;
int blockIndex = newLen >> blockBits;
nextWrite = newLen & blockMask;
if (nextWrite == 0) {
blockIndex--;
nextWrite = blockSize;
}
blocks.subList(blockIndex+1, blocks.size()).clear();
if (newLen == 0) {
current = null;
} else {
current = blocks.get(blockIndex);
}
assert newLen == getPosition();
}
public void finish() {
if (current != null) {
byte[] lastBuffer = new byte[nextWrite];

View File

@ -730,7 +730,7 @@ public final class FST<T> {
int destPos = fixedArrayStart + nodeIn.numArcs*maxBytesPerArc;
assert destPos >= srcPos;
if (destPos > srcPos) {
bytes.skip(destPos - srcPos);
bytes.skipBytes(destPos - srcPos);
for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
destPos -= maxBytesPerArc;
srcPos -= bytesPerArc[arcIdx];
@ -1444,9 +1444,6 @@ public final class FST<T> {
throw new IllegalArgumentException("this FST was not built with willPackFST=true");
}
final RAMOutputStream buffer = new RAMOutputStream();
byte[] bufferBytes = new byte[64];
Arc<T> arc = new Arc<T>();
final BytesReader r = getBytesReader(0);
@ -1553,7 +1550,7 @@ public final class FST<T> {
// this is an array'd node and bytesPerArc changes:
writeNode:
while(true) { // retry writing this node
assert buffer.getFilePointer() == 0;
//System.out.println(" cycle: retry");
readFirstRealTargetArc(node, arc, r);
@ -1563,9 +1560,9 @@ public final class FST<T> {
if (bytesPerArc == 0) {
bytesPerArc = arc.bytesPerArc;
}
buffer.writeByte(ARCS_AS_FIXED_ARRAY);
buffer.writeVInt(arc.numArcs);
buffer.writeVInt(bytesPerArc);
writer.writeByte(ARCS_AS_FIXED_ARRAY);
writer.writeVInt(arc.numArcs);
writer.writeVInt(bytesPerArc);
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
}
@ -1574,7 +1571,7 @@ public final class FST<T> {
while(true) { // iterate over all arcs for this node
//System.out.println(" cycle next arc");
final int arcStartPos = (int) buffer.getFilePointer();
final int arcStartPos = writer.getPosition();
nodeArcCount++;
byte flags = 0;
@ -1621,7 +1618,7 @@ public final class FST<T> {
absPtr = topNodeMap.size() + (int) newNodeAddress.get(arc.target) + addressError;
}
int delta = (int) (newNodeAddress.get(arc.target) + addressError - buffer.getFilePointer() - address - 2);
int delta = (int) (newNodeAddress.get(arc.target) + addressError - writer.getPosition() - 2);
if (delta < 0) {
//System.out.println("neg: " + delta);
anyNegDelta = true;
@ -1637,23 +1634,23 @@ public final class FST<T> {
}
assert flags != ARCS_AS_FIXED_ARRAY;
buffer.writeByte(flags);
writer.writeByte(flags);
fst.writeLabel(buffer, arc.label);
fst.writeLabel(writer, arc.label);
if (arc.output != NO_OUTPUT) {
outputs.write(arc.output, buffer);
outputs.write(arc.output, writer);
if (!retry) {
fst.arcWithOutputCount++;
}
}
if (arc.nextFinalOutput != NO_OUTPUT) {
outputs.writeFinalOutput(arc.nextFinalOutput, buffer);
outputs.writeFinalOutput(arc.nextFinalOutput, writer);
}
if (doWriteTarget) {
int delta = (int) (newNodeAddress.get(arc.target) + addressError - buffer.getFilePointer() - address);
int delta = (int) (newNodeAddress.get(arc.target) + addressError - writer.getPosition());
if (delta < 0) {
anyNegDelta = true;
//System.out.println("neg: " + delta);
@ -1662,7 +1659,7 @@ public final class FST<T> {
if (flag(flags, BIT_TARGET_DELTA)) {
//System.out.println(" delta");
buffer.writeVInt(delta);
writer.writeVInt(delta);
if (!retry) {
deltaCount++;
}
@ -1674,7 +1671,7 @@ public final class FST<T> {
System.out.println(" abs");
}
*/
buffer.writeVInt(absPtr);
writer.writeVInt(absPtr);
if (!retry) {
if (absPtr >= topNodeMap.size()) {
absCount++;
@ -1686,7 +1683,7 @@ public final class FST<T> {
}
if (useArcArray) {
final int arcBytes = (int) (buffer.getFilePointer() - arcStartPos);
final int arcBytes = writer.getPosition() - arcStartPos;
//System.out.println(" " + arcBytes + " bytes");
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
// NOTE: this may in fact go "backwards", if
@ -1696,11 +1693,7 @@ public final class FST<T> {
// will retry (below) so it's OK to ovewrite
// bytes:
//wasted += bytesPerArc - arcBytes;
int skip = (int) (arcStartPos + bytesPerArc - buffer.getFilePointer());
while(skip > 0) {
buffer.writeByte((byte) 0);
skip--;
}
writer.skipBytes(arcStartPos + bytesPerArc - writer.getPosition());
}
if (arc.isLast()) {
@ -1725,19 +1718,12 @@ public final class FST<T> {
// Retry:
bytesPerArc = maxBytesPerArc;
buffer.reset();
writer.truncate(address);
nodeArcCount = 0;
retry = true;
anyNegDelta = false;
}
if (bufferBytes.length < (int) buffer.getFilePointer()) {
bufferBytes = ArrayUtil.grow(bufferBytes, (int) buffer.getFilePointer());
}
buffer.writeTo(bufferBytes, 0);
writer.writeBytes(bufferBytes, 0, (int) buffer.getFilePointer());
buffer.reset();
negDelta |= anyNegDelta;
fst.arcCount += nodeArcCount;

View File

@ -42,7 +42,7 @@ public class TestBytesStore extends LuceneTestCase {
int pos = 0;
while(pos < numBytes) {
int op = random().nextInt(7);
int op = random().nextInt(8);
if (VERBOSE) {
System.out.println(" cycle pos=" + pos);
}
@ -97,21 +97,21 @@ public class TestBytesStore extends LuceneTestCase {
case 3:
{
// reverse bytes
if (pos > 0) {
int len = _TestUtil.nextInt(random(), 1, Math.min(100, pos));
if (pos > 1) {
int len = _TestUtil.nextInt(random(), 2, Math.min(100, pos));
int start;
if (len == pos) {
start = 0;
} else {
start = random().nextInt(pos - len);
}
int end = start + len;
int end = start + len - 1;
if (VERBOSE) {
System.out.println(" reverse start=" + start + " end=" + end + " len=" + len);
System.out.println(" reverse start=" + start + " end=" + end + " len=" + len + " pos=" + pos);
}
bytes.reverse(start, end);
while(start < end) {
while(start <= end) {
byte b = expected[end];
expected[end] = expected[start];
expected[start] = b;
@ -159,17 +159,49 @@ public class TestBytesStore extends LuceneTestCase {
{
// skip
int len = random().nextInt(Math.min(100, numBytes - pos));
pos += len;
bytes.skip(len);
if (VERBOSE) {
System.out.println(" skip len=" + len);
}
pos += len;
bytes.skipBytes(len);
// NOTE: must fill in zeros in case truncate was
// used, else we get false fails:
if (len > 0) {
byte[] zeros = new byte[len];
bytes.writeBytes(pos-len, zeros, 0, len);
}
}
break;
case 7:
{
// absWriteByte
if (pos > 0) {
int dest = random().nextInt(pos);
byte b = (byte) random().nextInt(256);
expected[dest] = b;
bytes.writeByte(dest, b);
}
break;
}
}
assertEquals(pos, bytes.getPosition());
if (pos > 0 && random().nextInt(50) == 17) {
// truncate
int len = _TestUtil.nextInt(random(), 1, Math.min(pos, 100));
bytes.truncate(pos - len);
pos -= len;
Arrays.fill(expected, pos, pos+len, (byte) 0);
if (VERBOSE) {
System.out.println(" truncate len=" + len + " newPos=" + pos);
}
}
if ((pos > 0 && random().nextInt(200) == 17)) {
verify(bytes, expected, pos);
}

View File

@ -483,11 +483,14 @@ public class TestFSTs extends LuceneTestCase {
break;
}
}
long t = System.currentTimeMillis() - tStart;
System.out.println((t / 1000.0) + " sec to build");
long tMid = System.currentTimeMillis();
System.out.println(((tMid-tStart) / 1000.0) + " sec to add all terms");
assert builder.getTermCount() == ord;
FST<T> fst = builder.finish();
long tEnd = System.currentTimeMillis();
System.out.println(((tEnd-tMid) / 1000.0) + " sec to finish/pack");
if (fst == null) {
System.out.println("FST was fully pruned!");
System.exit(0);