From 23eb8720451322433627d5cc5560b9d3e25595b1 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 30 Jan 2013 13:21:17 +0000 Subject: [PATCH] fix BinaryDVWriter to use ByteBlockPool git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440397 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/BinaryDocValuesWriter.java | 58 ++++--------------- .../org/apache/lucene/util/ByteBlockPool.java | 33 +++++++---- .../lucene/search/suggest/BytesRefArray.java | 4 +- 3 files changed, 35 insertions(+), 60 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java index 66541c396c8..1c5378a4817 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java @@ -17,38 +17,32 @@ package org.apache.lucene.index; * limitations under the License. */ -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - import java.io.IOException; import java.util.Iterator; import java.util.NoSuchElementException; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.store.RAMFile; -import org.apache.lucene.store.RAMInputStream; -import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; +import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; import org.apache.lucene.util.packed.AppendingLongBuffer; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + /** Buffers up pending byte[] per doc, then flushes when * segment flushes. */ class BinaryDocValuesWriter extends DocValuesWriter { - private final RAMFile bytes; - private final RAMOutputStream bytesWriter; + private final ByteBlockPool pool; private final AppendingLongBuffer lengths; private final FieldInfo fieldInfo; - private final Counter iwBytesUsed; - private long bytesUsed; private int addedValues = 0; public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; - this.bytes = new RAMFile(); - this.bytesWriter = new RAMOutputStream(bytes); - this.iwBytesUsed = iwBytesUsed; + this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed)); this.lengths = new AppendingLongBuffer(); } @@ -70,31 +64,11 @@ class BinaryDocValuesWriter extends DocValuesWriter { } addedValues++; lengths.add(value.length); - try { - bytesWriter.writeBytes(value.bytes, value.offset, value.length); - } catch (IOException e) { - throw new RuntimeException(e); - } - - updateBytesUsed(); - } - - private void updateBytesUsed() { - // nocommit not totally accurate, but just fix not to use RAMFile anyway - long numBuffers = (bytesWriter.getFilePointer() / 1024) + 1; // round up - long oversize = numBuffers * (1024 + 32); // fudge for arraylist/etc overhead - final long newBytesUsed = lengths.ramBytesUsed() + oversize; - iwBytesUsed.addAndGet(newBytesUsed - bytesUsed); - bytesUsed = newBytesUsed; + pool.append(value); } @Override public void finish(int maxDoc) { - try { - bytesWriter.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } } @Override @@ -119,16 +93,11 @@ class BinaryDocValuesWriter extends DocValuesWriter { final AppendingLongBuffer.Iterator lengthsIterator = lengths.iterator(); final int size = lengths.size(); final int maxDoc; - final RAMInputStream bytesReader; int upto; + long byteOffset; BytesIterator(int maxDoc) { this.maxDoc = maxDoc; - try { - bytesReader = new RAMInputStream("BinaryDocValuesWriter", bytes); - } catch (IOException e) { - throw new RuntimeException(e); - } } @Override @@ -143,14 +112,11 @@ class BinaryDocValuesWriter extends DocValuesWriter { } if (upto < size) { int length = (int) lengthsIterator.next(); - value.grow(length); - try { - bytesReader.readBytes(value.bytes, 0, length); - } catch (IOException e) { - throw new RuntimeException(e); - } - value.length = length; + pool.readBytes(value, byteOffset, length); + byteOffset += length; } else { + // This is to handle last N documents not having + // this DV field in the end of the segment: value.length = 0; } upto++; diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 265ccea79d4..9f7a85cb4c7 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -253,8 +253,9 @@ public final class ByteBlockPool { final int newSize = LEVEL_SIZE_ARRAY[newLevel]; // Maybe allocate another block - if (byteUpto > BYTE_BLOCK_SIZE-newSize) + if (byteUpto > BYTE_BLOCK_SIZE-newSize) { nextBuffer(); + } final int newUpto = byteUpto; final int offset = newUpto + byteOffset; @@ -280,7 +281,7 @@ public final class ByteBlockPool { // Fill in a BytesRef from term's length & bytes encoded in // byte block - public final void setBytesRef(BytesRef term, int textStart) { + public void setBytesRef(BytesRef term, int textStart) { final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT]; int pos = textStart & BYTE_BLOCK_MASK; if ((bytes[pos] & 0x80) == 0) { @@ -296,11 +297,14 @@ public final class ByteBlockPool { } /** - * Copies the given {@link BytesRef} at the current positions ( - * {@link #byteUpto} across buffer boundaries + * Appends the bytes in the provided {@link BytesRef} at + * the current position. */ - public final void copy(final BytesRef bytes) { + public void append(final BytesRef bytes) { int length = bytes.length; + if (length == 0) { + return; + } int offset = bytes.offset; int overflow = (length + byteUpto) - BYTE_BLOCK_SIZE; do { @@ -310,9 +314,11 @@ public final class ByteBlockPool { break; } else { final int bytesToCopy = length-overflow; - System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesToCopy); - offset += bytesToCopy; - length -= bytesToCopy; + if (bytesToCopy > 0) { + System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesToCopy); + offset += bytesToCopy; + length -= bytesToCopy; + } nextBuffer(); overflow = overflow - BYTE_BLOCK_SIZE; } @@ -320,17 +326,20 @@ public final class ByteBlockPool { } /** - * Copies bytes from the pool starting at the given offset with the given + * Reads bytes bytes out of the pool starting at the given offset with the given * length into the given {@link BytesRef} at offset 0. *

Note: this method allows to copy across block boundaries.

*/ - public final void copyFrom(final BytesRef bytes, final int offset, final int length) { + public void readBytes(final BytesRef bytes, final long offset, final int length) { bytes.offset = 0; bytes.grow(length); bytes.length = length; - int bufferIndex = offset >> BYTE_BLOCK_SHIFT; + if (length == 0) { + return; + } + int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); byte[] buffer = buffers[bufferIndex]; - int pos = offset & BYTE_BLOCK_MASK; + int pos = (int) (offset & BYTE_BLOCK_MASK); int overflow = (pos + length) - BYTE_BLOCK_SIZE; do { if (overflow <= 0) { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java index 15ca9eb4412..ab65ff7a390 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java @@ -79,7 +79,7 @@ final class BytesRefArray { bytesUsed.addAndGet((offsets.length - oldLen) * RamUsageEstimator.NUM_BYTES_INT); } - pool.copy(bytes); + pool.append(bytes); offsets[lastElement++] = currentOffset; currentOffset += bytes.length; return lastElement; @@ -104,7 +104,7 @@ final class BytesRefArray { int offset = offsets[ord]; int length = ord == lastElement - 1 ? currentOffset - offset : offsets[ord + 1] - offset; - pool.copyFrom(spare, offset, length); + pool.readBytes(spare, offset, length); return spare; } throw new IndexOutOfBoundsException("index " + ord