mirror of https://github.com/apache/lucene.git
Merged /lucene/dev/trunk:r1514641-1514711
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1514719 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
7a4f7c669b
|
@ -122,6 +122,11 @@ Bug Fixes
|
||||||
boundary, made it into the top-N and went to the formatter.
|
boundary, made it into the top-N and went to the formatter.
|
||||||
(Manuel Amoabeng, Michael McCandless, Robert Muir)
|
(Manuel Amoabeng, Michael McCandless, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4583: Indexing core no longer enforces a limit on maximum
|
||||||
|
length binary doc values fields, but individual codecs (including
|
||||||
|
the default one) have their own limits (David Smiley, Robert Muir,
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||||
|
|
|
@ -118,6 +118,11 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* {@code BYTES_VAR_DEREF BYTES_VAR_DEREF} it doesn't apply deduplication of the document values.
|
* {@code BYTES_VAR_DEREF BYTES_VAR_DEREF} it doesn't apply deduplication of the document values.
|
||||||
* </li>
|
* </li>
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Limitations:
|
||||||
|
* <ul>
|
||||||
|
* <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
|
||||||
|
* </ul>
|
||||||
* @deprecated Only for reading old 4.0 and 4.1 segments
|
* @deprecated Only for reading old 4.0 and 4.1 segments
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
|
@ -125,6 +130,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
// for back compat only!
|
// for back compat only!
|
||||||
public class Lucene40DocValuesFormat extends DocValuesFormat {
|
public class Lucene40DocValuesFormat extends DocValuesFormat {
|
||||||
|
|
||||||
|
/** Maximum length for each binary doc values field. */
|
||||||
|
public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene40DocValuesFormat() {
|
public Lucene40DocValuesFormat() {
|
||||||
super("Lucene40");
|
super("Lucene40");
|
||||||
|
|
|
@ -37,14 +37,14 @@ import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.MathUtil;
|
import org.apache.lucene.util.MathUtil;
|
||||||
import org.apache.lucene.util.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.fst.FST;
|
|
||||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
||||||
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writer for {@link Lucene42DocValuesFormat}
|
* Writer for {@link Lucene42DocValuesFormat}
|
||||||
|
@ -220,6 +220,9 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer {
|
||||||
final long startFP = data.getFilePointer();
|
final long startFP = data.getFilePointer();
|
||||||
for(BytesRef v : values) {
|
for(BytesRef v : values) {
|
||||||
final int length = v == null ? 0 : v.length;
|
final int length = v == null ? 0 : v.length;
|
||||||
|
if (length > Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
|
||||||
|
throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
|
||||||
|
}
|
||||||
minLength = Math.min(minLength, length);
|
minLength = Math.min(minLength, length);
|
||||||
maxLength = Math.max(maxLength, length);
|
maxLength = Math.max(maxLength, length);
|
||||||
if (v != null) {
|
if (v != null) {
|
||||||
|
|
|
@ -118,8 +118,17 @@ import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||||
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
|
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
|
||||||
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
|
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
|
||||||
* </ol>
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
* Limitations:
|
||||||
|
* <ul>
|
||||||
|
* <li> Binary doc values can be at most {@link #MAX_BINARY_FIELD_LENGTH} in length.
|
||||||
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public final class Lucene42DocValuesFormat extends DocValuesFormat {
|
public final class Lucene42DocValuesFormat extends DocValuesFormat {
|
||||||
|
|
||||||
|
/** Maximum length for each binary doc values field. */
|
||||||
|
public static final int MAX_BINARY_FIELD_LENGTH = (1 << 15) - 2;
|
||||||
|
|
||||||
final float acceptableOverheadRatio;
|
final float acceptableOverheadRatio;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -22,33 +22,44 @@ import java.util.Iterator;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.OpenBitSet;
|
import org.apache.lucene.util.OpenBitSet;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
import org.apache.lucene.util.packed.AppendingDeltaPackedLongBuffer;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
|
||||||
|
|
||||||
|
|
||||||
/** Buffers up pending byte[] per doc, then flushes when
|
/** Buffers up pending byte[] per doc, then flushes when
|
||||||
* segment flushes. */
|
* segment flushes. */
|
||||||
class BinaryDocValuesWriter extends DocValuesWriter {
|
class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
|
|
||||||
private final ByteBlockPool pool;
|
/** Maximum length for a binary field; we set this to "a
|
||||||
|
* bit" below Integer.MAX_VALUE because the exact max
|
||||||
|
* allowed byte[] is JVM dependent, so we want to avoid
|
||||||
|
* a case where a large value worked in one JVM but
|
||||||
|
* failed later at search time with a different JVM. */
|
||||||
|
private static final int MAX_LENGTH = Integer.MAX_VALUE-256;
|
||||||
|
|
||||||
|
// 32 KB block sizes for PagedBytes storage:
|
||||||
|
private final static int BLOCK_BITS = 15;
|
||||||
|
|
||||||
|
private final PagedBytes bytes;
|
||||||
|
private final DataOutput bytesOut;
|
||||||
|
|
||||||
|
private final Counter iwBytesUsed;
|
||||||
private final AppendingDeltaPackedLongBuffer lengths;
|
private final AppendingDeltaPackedLongBuffer lengths;
|
||||||
private final OpenBitSet docsWithField;
|
private final OpenBitSet docsWithField;
|
||||||
private final Counter iwBytesUsed;
|
|
||||||
private long bytesUsed;
|
|
||||||
private final FieldInfo fieldInfo;
|
private final FieldInfo fieldInfo;
|
||||||
private int addedValues = 0;
|
private int addedValues;
|
||||||
|
private long bytesUsed;
|
||||||
|
|
||||||
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed));
|
this.bytes = new PagedBytes(BLOCK_BITS);
|
||||||
|
this.bytesOut = bytes.getDataOutput();
|
||||||
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
this.lengths = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
|
||||||
this.iwBytesUsed = iwBytesUsed;
|
this.iwBytesUsed = iwBytesUsed;
|
||||||
this.docsWithField = new OpenBitSet();
|
this.docsWithField = new OpenBitSet();
|
||||||
|
@ -63,8 +74,8 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed");
|
throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed");
|
||||||
}
|
}
|
||||||
if (value.length > (BYTE_BLOCK_SIZE - 2)) {
|
if (value.length > MAX_LENGTH) {
|
||||||
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2));
|
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + MAX_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill in any holes:
|
// Fill in any holes:
|
||||||
|
@ -74,7 +85,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
}
|
}
|
||||||
addedValues++;
|
addedValues++;
|
||||||
lengths.add(value.length);
|
lengths.add(value.length);
|
||||||
pool.append(value);
|
try {
|
||||||
|
bytesOut.writeBytes(value.bytes, value.offset, value.length);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// Should never happen!
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
docsWithField.set(docID);
|
docsWithField.set(docID);
|
||||||
updateBytesUsed();
|
updateBytesUsed();
|
||||||
}
|
}
|
||||||
|
@ -85,7 +101,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateBytesUsed() {
|
private void updateBytesUsed() {
|
||||||
final long newBytesUsed = docsWithFieldBytesUsed();
|
final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithFieldBytesUsed();
|
||||||
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
|
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
|
||||||
bytesUsed = newBytesUsed;
|
bytesUsed = newBytesUsed;
|
||||||
}
|
}
|
||||||
|
@ -97,6 +113,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
@Override
|
@Override
|
||||||
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException {
|
||||||
final int maxDoc = state.segmentInfo.getDocCount();
|
final int maxDoc = state.segmentInfo.getDocCount();
|
||||||
|
bytes.freeze(false);
|
||||||
dvConsumer.addBinaryField(fieldInfo,
|
dvConsumer.addBinaryField(fieldInfo,
|
||||||
new Iterable<BytesRef>() {
|
new Iterable<BytesRef>() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -114,10 +131,10 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
private class BytesIterator implements Iterator<BytesRef> {
|
private class BytesIterator implements Iterator<BytesRef> {
|
||||||
final BytesRef value = new BytesRef();
|
final BytesRef value = new BytesRef();
|
||||||
final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
|
final AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = lengths.iterator();
|
||||||
|
final DataInput bytesIterator = bytes.getDataInput();
|
||||||
final int size = (int) lengths.size();
|
final int size = (int) lengths.size();
|
||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
int upto;
|
int upto;
|
||||||
long byteOffset;
|
|
||||||
|
|
||||||
BytesIterator(int maxDoc) {
|
BytesIterator(int maxDoc) {
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
|
@ -138,8 +155,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
int length = (int) lengthsIterator.next();
|
int length = (int) lengthsIterator.next();
|
||||||
value.grow(length);
|
value.grow(length);
|
||||||
value.length = length;
|
value.length = length;
|
||||||
pool.readBytes(byteOffset, value.bytes, value.offset, value.length);
|
try {
|
||||||
byteOffset += length;
|
bytesIterator.readBytes(value.bytes, value.offset, value.length);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// Should never happen!
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
if (docsWithField.get(upto)) {
|
if (docsWithField.get(upto)) {
|
||||||
v = value;
|
v = value;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -92,21 +92,22 @@ public final class FieldInfo {
|
||||||
*/
|
*/
|
||||||
NUMERIC,
|
NUMERIC,
|
||||||
/**
|
/**
|
||||||
* A per-document byte[].
|
* A per-document byte[]. Values may be larger than
|
||||||
|
* 32766 bytes, but different codecs may enforce their own limits.
|
||||||
*/
|
*/
|
||||||
BINARY,
|
BINARY,
|
||||||
/**
|
/**
|
||||||
* A pre-sorted byte[]. Fields with this type only store distinct byte values
|
* A pre-sorted byte[]. Fields with this type only store distinct byte values
|
||||||
* and store an additional offset pointer per document to dereference the shared
|
* and store an additional offset pointer per document to dereference the shared
|
||||||
* byte[]. The stored byte[] is presorted and allows access via document id,
|
* byte[]. The stored byte[] is presorted and allows access via document id,
|
||||||
* ordinal and by-value.
|
* ordinal and by-value. Values must be <= 32766 bytes.
|
||||||
*/
|
*/
|
||||||
SORTED,
|
SORTED,
|
||||||
/**
|
/**
|
||||||
* A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values
|
* A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values
|
||||||
* and store additional offset pointers per document to dereference the shared
|
* and store additional offset pointers per document to dereference the shared
|
||||||
* byte[]s. The stored byte[] is presorted and allows access via document id,
|
* byte[]s. The stored byte[] is presorted and allows access via document id,
|
||||||
* ordinal and by-value.
|
* ordinal and by-value. Values must be <= 32766 bytes.
|
||||||
*/
|
*/
|
||||||
SORTED_SET
|
SORTED_SET
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
/** Represents a logical byte[] as a series of pages. You
|
/** Represents a logical byte[] as a series of pages. You
|
||||||
|
@ -34,6 +36,7 @@ import org.apache.lucene.store.IndexInput;
|
||||||
// other "shift/mask big arrays". there are too many of these classes!
|
// other "shift/mask big arrays". there are too many of these classes!
|
||||||
public final class PagedBytes {
|
public final class PagedBytes {
|
||||||
private final List<byte[]> blocks = new ArrayList<byte[]>();
|
private final List<byte[]> blocks = new ArrayList<byte[]>();
|
||||||
|
// TODO: these are unused?
|
||||||
private final List<Integer> blockEnd = new ArrayList<Integer>();
|
private final List<Integer> blockEnd = new ArrayList<Integer>();
|
||||||
private final int blockSize;
|
private final int blockSize;
|
||||||
private final int blockBits;
|
private final int blockBits;
|
||||||
|
@ -42,6 +45,7 @@ public final class PagedBytes {
|
||||||
private boolean frozen;
|
private boolean frozen;
|
||||||
private int upto;
|
private int upto;
|
||||||
private byte[] currentBlock;
|
private byte[] currentBlock;
|
||||||
|
private final long bytesUsedPerBlock;
|
||||||
|
|
||||||
private static final byte[] EMPTY_BYTES = new byte[0];
|
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||||
|
|
||||||
|
@ -75,13 +79,13 @@ public final class PagedBytes {
|
||||||
* given length. Iff the slice spans across a block border this method will
|
* given length. Iff the slice spans across a block border this method will
|
||||||
* allocate sufficient resources and copy the paged data.
|
* allocate sufficient resources and copy the paged data.
|
||||||
* <p>
|
* <p>
|
||||||
* Slices spanning more than one block are not supported.
|
* Slices spanning more than two blocks are not supported.
|
||||||
* </p>
|
* </p>
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
**/
|
**/
|
||||||
public void fillSlice(BytesRef b, long start, int length) {
|
public void fillSlice(BytesRef b, long start, int length) {
|
||||||
assert length >= 0: "length=" + length;
|
assert length >= 0: "length=" + length;
|
||||||
assert length <= blockSize+1;
|
assert length <= blockSize+1: "length=" + length;
|
||||||
final int index = (int) (start >> blockBits);
|
final int index = (int) (start >> blockBits);
|
||||||
final int offset = (int) (start & blockMask);
|
final int offset = (int) (start & blockMask);
|
||||||
b.length = length;
|
b.length = length;
|
||||||
|
@ -132,6 +136,7 @@ public final class PagedBytes {
|
||||||
this.blockBits = blockBits;
|
this.blockBits = blockBits;
|
||||||
blockMask = blockSize-1;
|
blockMask = blockSize-1;
|
||||||
upto = blockSize;
|
upto = blockSize;
|
||||||
|
bytesUsedPerBlock = blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Read this many bytes from in */
|
/** Read this many bytes from in */
|
||||||
|
@ -216,6 +221,11 @@ public final class PagedBytes {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return approx RAM usage in bytes. */
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return (blocks.size() + (currentBlock != null ? 1 : 0)) * bytesUsedPerBlock;
|
||||||
|
}
|
||||||
|
|
||||||
/** Copy bytes in, writing the length as a 1 or 2 byte
|
/** Copy bytes in, writing the length as a 1 or 2 byte
|
||||||
* vInt prefix. */
|
* vInt prefix. */
|
||||||
// TODO: this really needs to be refactored into fieldcacheimpl!
|
// TODO: this really needs to be refactored into fieldcacheimpl!
|
||||||
|
@ -249,4 +259,148 @@ public final class PagedBytes {
|
||||||
|
|
||||||
return pointer;
|
return pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final class PagedBytesDataInput extends DataInput {
|
||||||
|
private int currentBlockIndex;
|
||||||
|
private int currentBlockUpto;
|
||||||
|
private byte[] currentBlock;
|
||||||
|
|
||||||
|
PagedBytesDataInput() {
|
||||||
|
currentBlock = blocks.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PagedBytesDataInput clone() {
|
||||||
|
PagedBytesDataInput clone = getDataInput();
|
||||||
|
clone.setPosition(getPosition());
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the current byte position. */
|
||||||
|
public long getPosition() {
|
||||||
|
return (long) currentBlockIndex * blockSize + currentBlockUpto;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Seek to a position previously obtained from
|
||||||
|
* {@link #getPosition}. */
|
||||||
|
public void setPosition(long pos) {
|
||||||
|
currentBlockIndex = (int) (pos >> blockBits);
|
||||||
|
currentBlock = blocks.get(currentBlockIndex);
|
||||||
|
currentBlockUpto = (int) (pos & blockMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte readByte() {
|
||||||
|
if (currentBlockUpto == blockSize) {
|
||||||
|
nextBlock();
|
||||||
|
}
|
||||||
|
return currentBlock[currentBlockUpto++];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readBytes(byte[] b, int offset, int len) {
|
||||||
|
assert b.length >= offset + len;
|
||||||
|
final int offsetEnd = offset + len;
|
||||||
|
while (true) {
|
||||||
|
final int blockLeft = blockSize - currentBlockUpto;
|
||||||
|
final int left = offsetEnd - offset;
|
||||||
|
if (blockLeft < left) {
|
||||||
|
System.arraycopy(currentBlock, currentBlockUpto,
|
||||||
|
b, offset,
|
||||||
|
blockLeft);
|
||||||
|
nextBlock();
|
||||||
|
offset += blockLeft;
|
||||||
|
} else {
|
||||||
|
// Last block
|
||||||
|
System.arraycopy(currentBlock, currentBlockUpto,
|
||||||
|
b, offset,
|
||||||
|
left);
|
||||||
|
currentBlockUpto += left;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void nextBlock() {
|
||||||
|
currentBlockIndex++;
|
||||||
|
currentBlockUpto = 0;
|
||||||
|
currentBlock = blocks.get(currentBlockIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public final class PagedBytesDataOutput extends DataOutput {
|
||||||
|
@Override
|
||||||
|
public void writeByte(byte b) {
|
||||||
|
if (upto == blockSize) {
|
||||||
|
if (currentBlock != null) {
|
||||||
|
blocks.add(currentBlock);
|
||||||
|
blockEnd.add(upto);
|
||||||
|
}
|
||||||
|
currentBlock = new byte[blockSize];
|
||||||
|
upto = 0;
|
||||||
|
}
|
||||||
|
currentBlock[upto++] = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeBytes(byte[] b, int offset, int length) {
|
||||||
|
assert b.length >= offset + length;
|
||||||
|
if (length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (upto == blockSize) {
|
||||||
|
if (currentBlock != null) {
|
||||||
|
blocks.add(currentBlock);
|
||||||
|
blockEnd.add(upto);
|
||||||
|
}
|
||||||
|
currentBlock = new byte[blockSize];
|
||||||
|
upto = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int offsetEnd = offset + length;
|
||||||
|
while(true) {
|
||||||
|
final int left = offsetEnd - offset;
|
||||||
|
final int blockLeft = blockSize - upto;
|
||||||
|
if (blockLeft < left) {
|
||||||
|
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
|
||||||
|
blocks.add(currentBlock);
|
||||||
|
blockEnd.add(blockSize);
|
||||||
|
currentBlock = new byte[blockSize];
|
||||||
|
upto = 0;
|
||||||
|
offset += blockLeft;
|
||||||
|
} else {
|
||||||
|
// Last block
|
||||||
|
System.arraycopy(b, offset, currentBlock, upto, left);
|
||||||
|
upto += left;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the current byte position. */
|
||||||
|
public long getPosition() {
|
||||||
|
return getPointer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a DataInput to read values from this
|
||||||
|
* PagedBytes instance. */
|
||||||
|
public PagedBytesDataInput getDataInput() {
|
||||||
|
if (!frozen) {
|
||||||
|
throw new IllegalStateException("must call freeze() before getDataInput");
|
||||||
|
}
|
||||||
|
return new PagedBytesDataInput();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a DataOutput that you may use to write into
|
||||||
|
* this PagedBytes instance. If you do this, you should
|
||||||
|
* not call the other writing methods (eg, copy);
|
||||||
|
* results are undefined. */
|
||||||
|
public PagedBytesDataOutput getDataOutput() {
|
||||||
|
if (frozen) {
|
||||||
|
throw new IllegalStateException("cannot get DataOutput after freeze()");
|
||||||
|
}
|
||||||
|
return new PagedBytesDataOutput();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,4 +31,10 @@ public class TestLucene40DocValuesFormat extends BaseDocValuesFormatTestCase {
|
||||||
return codec;
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-4583: This codec should throw IAE on huge binary values:
|
||||||
|
@Override
|
||||||
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,4 +30,9 @@ public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
return codec;
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic tests of PerFieldDocValuesFormat
|
* Basic tests of PerFieldDocValuesFormat
|
||||||
|
@ -64,6 +65,11 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
|
||||||
return codec;
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
|
return _TestUtil.fieldSupportsHugeBinaryDocValues(field);
|
||||||
|
}
|
||||||
|
|
||||||
// just a simple trivial test
|
// just a simple trivial test
|
||||||
// TODO: we should come up with a test that somehow checks that segment suffix
|
// TODO: we should come up with a test that somehow checks that segment suffix
|
||||||
// is respected by all codec apis (not just docvalues and postings)
|
// is respected by all codec apis (not just docvalues and postings)
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
/** Tests the codec configuration defined by LuceneTestCase randomly
|
/** Tests the codec configuration defined by LuceneTestCase randomly
|
||||||
* (typically a mix across different fields).
|
* (typically a mix across different fields).
|
||||||
|
@ -28,4 +29,9 @@ public class TestDocValuesFormat extends BaseDocValuesFormatTestCase {
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
return Codec.getDefault();
|
return Codec.getDefault();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
|
return _TestUtil.fieldSupportsHugeBinaryDocValues(field);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -327,30 +327,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTooLargeBytes() throws IOException {
|
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
|
||||||
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
|
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
|
||||||
iwc.setMergePolicy(newLogMergePolicy());
|
|
||||||
IndexWriter iwriter = new IndexWriter(directory, iwc);
|
|
||||||
Document doc = new Document();
|
|
||||||
byte bytes[] = new byte[100000];
|
|
||||||
BytesRef b = new BytesRef(bytes);
|
|
||||||
random().nextBytes(bytes);
|
|
||||||
doc.add(new BinaryDocValuesField("dv", b));
|
|
||||||
try {
|
|
||||||
iwriter.addDocument(doc);
|
|
||||||
fail("did not get expected exception");
|
|
||||||
} catch (IllegalArgumentException expected) {
|
|
||||||
// expected
|
|
||||||
}
|
|
||||||
iwriter.close();
|
|
||||||
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testTooLargeSortedBytes() throws IOException {
|
public void testTooLargeSortedBytes() throws IOException {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.*;
|
||||||
|
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
@ -30,6 +31,9 @@ import org.junit.Ignore;
|
||||||
|
|
||||||
public class TestPagedBytes extends LuceneTestCase {
|
public class TestPagedBytes extends LuceneTestCase {
|
||||||
|
|
||||||
|
// Writes random byte/s to "normal" file in dir, then
|
||||||
|
// copies into PagedBytes and verifies with
|
||||||
|
// PagedBytes.Reader:
|
||||||
public void testDataInputOutput() throws Exception {
|
public void testDataInputOutput() throws Exception {
|
||||||
Random random = random();
|
Random random = random();
|
||||||
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
|
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
|
||||||
|
@ -90,6 +94,60 @@ public class TestPagedBytes extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Writes random byte/s into PagedBytes via
|
||||||
|
// .getDataOutput(), then verifies with
|
||||||
|
// PagedBytes.getDataInput():
|
||||||
|
public void testDataInputOutput2() throws Exception {
|
||||||
|
Random random = random();
|
||||||
|
for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
|
||||||
|
final int blockBits = _TestUtil.nextInt(random, 1, 20);
|
||||||
|
final int blockSize = 1 << blockBits;
|
||||||
|
final PagedBytes p = new PagedBytes(blockBits);
|
||||||
|
final DataOutput out = p.getDataOutput();
|
||||||
|
final int numBytes = random().nextInt(10000000);
|
||||||
|
|
||||||
|
final byte[] answer = new byte[numBytes];
|
||||||
|
random().nextBytes(answer);
|
||||||
|
int written = 0;
|
||||||
|
while(written < numBytes) {
|
||||||
|
if (random().nextInt(10) == 7) {
|
||||||
|
out.writeByte(answer[written++]);
|
||||||
|
} else {
|
||||||
|
int chunk = Math.min(random().nextInt(1000), numBytes - written);
|
||||||
|
out.writeBytes(answer, written, chunk);
|
||||||
|
written += chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final PagedBytes.Reader reader = p.freeze(random.nextBoolean());
|
||||||
|
|
||||||
|
final DataInput in = p.getDataInput();
|
||||||
|
|
||||||
|
final byte[] verify = new byte[numBytes];
|
||||||
|
int read = 0;
|
||||||
|
while(read < numBytes) {
|
||||||
|
if (random().nextInt(10) == 7) {
|
||||||
|
verify[read++] = in.readByte();
|
||||||
|
} else {
|
||||||
|
int chunk = Math.min(random().nextInt(1000), numBytes - read);
|
||||||
|
in.readBytes(verify, read, chunk);
|
||||||
|
read += chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue(Arrays.equals(answer, verify));
|
||||||
|
|
||||||
|
final BytesRef slice = new BytesRef();
|
||||||
|
for(int iter2=0;iter2<100;iter2++) {
|
||||||
|
final int pos = random.nextInt(numBytes-1);
|
||||||
|
final int len = random.nextInt(Math.min(blockSize+1, numBytes - pos));
|
||||||
|
reader.fillSlice(slice, pos, len);
|
||||||
|
for(int byteUpto=0;byteUpto<len;byteUpto++) {
|
||||||
|
assertEquals(answer[pos + byteUpto], slice.bytes[slice.offset + byteUpto]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Ignore // memory hole
|
@Ignore // memory hole
|
||||||
public void testOverflow() throws IOException {
|
public void testOverflow() throws IOException {
|
||||||
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("testOverflow"));
|
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("testOverflow"));
|
||||||
|
@ -126,4 +184,5 @@ public class TestPagedBytes extends LuceneTestCase {
|
||||||
in.close();
|
in.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,13 +22,16 @@ import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.FacetTestUtils;
|
import org.apache.lucene.facet.FacetTestUtils;
|
||||||
|
import org.apache.lucene.facet.codecs.facet42.Facet42Codec;
|
||||||
import org.apache.lucene.facet.index.FacetFields;
|
import org.apache.lucene.facet.index.FacetFields;
|
||||||
import org.apache.lucene.facet.params.CategoryListParams;
|
import org.apache.lucene.facet.params.CategoryListParams;
|
||||||
import org.apache.lucene.facet.params.FacetIndexingParams;
|
import org.apache.lucene.facet.params.FacetIndexingParams;
|
||||||
|
@ -48,6 +51,8 @@ import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
public class TestDemoFacets extends FacetTestCase {
|
public class TestDemoFacets extends FacetTestCase {
|
||||||
|
|
||||||
|
@ -248,4 +253,60 @@ public class TestDemoFacets extends FacetTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
taxoDir.close();
|
taxoDir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-4583: make sure if we require > 32 KB for one
|
||||||
|
// document, we don't hit exc when using Facet42DocValuesFormat
|
||||||
|
public void testManyFacetsInOneDocument() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
Directory taxoDir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
|
iwc.setCodec(new Facet42Codec());
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
|
||||||
|
|
||||||
|
FacetFields facetFields = new FacetFields(taxoWriter);
|
||||||
|
|
||||||
|
int numLabels = _TestUtil.nextInt(random(), 40000, 100000);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newTextField("field", "text", Field.Store.NO));
|
||||||
|
List<CategoryPath> paths = new ArrayList<CategoryPath>();
|
||||||
|
for(int i=0;i<numLabels;i++) {
|
||||||
|
paths.add(new CategoryPath("dim", "" + i));
|
||||||
|
}
|
||||||
|
facetFields.addFields(doc, paths);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
|
||||||
|
// NRT open
|
||||||
|
IndexSearcher searcher = newSearcher(writer.getReader());
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
// NRT open
|
||||||
|
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||||
|
taxoWriter.close();
|
||||||
|
|
||||||
|
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
|
||||||
|
|
||||||
|
// Aggregate the facet counts:
|
||||||
|
FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
|
||||||
|
|
||||||
|
// MatchAllDocsQuery is for "browsing" (counts facets
|
||||||
|
// for all non-deleted docs in the index); normally
|
||||||
|
// you'd use a "normal" query, and use MultiCollector to
|
||||||
|
// wrap collecting the "normal" hits and also facets:
|
||||||
|
searcher.search(new MatchAllDocsQuery(), c);
|
||||||
|
List<FacetResult> results = c.getFacetResults();
|
||||||
|
assertEquals(1, results.size());
|
||||||
|
FacetResultNode root = results.get(0).getFacetResultNode();
|
||||||
|
assertEquals(numLabels, root.subResults.size());
|
||||||
|
Set<String> allLabels = new HashSet<String>();
|
||||||
|
for(FacetResultNode childNode : root.subResults) {
|
||||||
|
assertEquals(2, childNode.label.length);
|
||||||
|
allLabels.add(childNode.label.components[1]);
|
||||||
|
assertEquals(1, (int) childNode.value);
|
||||||
|
}
|
||||||
|
assertEquals(numLabels, allLabels.size());
|
||||||
|
|
||||||
|
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,6 +162,9 @@ class Lucene40DocValuesWriter extends DocValuesConsumer {
|
||||||
if (b == null) {
|
if (b == null) {
|
||||||
b = new BytesRef(); // 4.0 doesnt distinguish
|
b = new BytesRef(); // 4.0 doesnt distinguish
|
||||||
}
|
}
|
||||||
|
if (b.length > Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
|
||||||
|
throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + Lucene40DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
|
||||||
|
}
|
||||||
minLength = Math.min(minLength, b.length);
|
minLength = Math.min(minLength, b.length);
|
||||||
maxLength = Math.max(maxLength, b.length);
|
maxLength = Math.max(maxLength, b.length);
|
||||||
if (uniqueValues != null) {
|
if (uniqueValues != null) {
|
||||||
|
|
|
@ -17,21 +17,20 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
|
||||||
import org.apache.lucene.document.BinaryDocValuesField;
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
@ -58,6 +57,8 @@ import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract class to do basic tests for a docvalues format.
|
* Abstract class to do basic tests for a docvalues format.
|
||||||
* NOTE: This test focuses on the docvalues impl, nothing else.
|
* NOTE: This test focuses on the docvalues impl, nothing else.
|
||||||
|
@ -2401,4 +2402,172 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-4853
|
||||||
|
public void testHugeBinaryValues() throws Exception {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
// FSDirectory because SimpleText will consume gobbs of
|
||||||
|
// space when storing big binary values:
|
||||||
|
Directory d = newFSDirectory(_TestUtil.getTempDir("hugeBinaryValues"));
|
||||||
|
boolean doFixed = random().nextBoolean();
|
||||||
|
int numDocs;
|
||||||
|
int fixedLength = 0;
|
||||||
|
if (doFixed) {
|
||||||
|
// Sometimes make all values fixed length since some
|
||||||
|
// codecs have different code paths for this:
|
||||||
|
numDocs = _TestUtil.nextInt(random(), 10, 20);
|
||||||
|
fixedLength = _TestUtil.nextInt(random(), 65537, 256*1024);
|
||||||
|
} else {
|
||||||
|
numDocs = _TestUtil.nextInt(random(), 100, 200);
|
||||||
|
}
|
||||||
|
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||||
|
List<byte[]> docBytes = new ArrayList<byte[]>();
|
||||||
|
long totalBytes = 0;
|
||||||
|
for(int docID=0;docID<numDocs;docID++) {
|
||||||
|
// we don't use RandomIndexWriter because it might add
|
||||||
|
// more docvalues than we expect !!!!
|
||||||
|
|
||||||
|
// Must be > 64KB in size to ensure more than 2 pages in
|
||||||
|
// PagedBytes would be needed:
|
||||||
|
int numBytes;
|
||||||
|
if (doFixed) {
|
||||||
|
numBytes = fixedLength;
|
||||||
|
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||||
|
numBytes = _TestUtil.nextInt(random(), 65537, 3*1024*1024);
|
||||||
|
} else {
|
||||||
|
numBytes = _TestUtil.nextInt(random(), 1, 1024*1024);
|
||||||
|
}
|
||||||
|
totalBytes += numBytes;
|
||||||
|
if (totalBytes > 5 * 1024*1024) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
byte[] bytes = new byte[numBytes];
|
||||||
|
random().nextBytes(bytes);
|
||||||
|
docBytes.add(bytes);
|
||||||
|
Document doc = new Document();
|
||||||
|
BytesRef b = new BytesRef(bytes);
|
||||||
|
b.length = bytes.length;
|
||||||
|
doc.add(new BinaryDocValuesField("field", b));
|
||||||
|
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||||
|
try {
|
||||||
|
w.addDocument(doc);
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||||
|
throw iae;
|
||||||
|
} else {
|
||||||
|
// OK: some codecs can't handle binary DV > 32K
|
||||||
|
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||||
|
w.rollback();
|
||||||
|
d.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DirectoryReader r;
|
||||||
|
try {
|
||||||
|
r = w.getReader();
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||||
|
throw iae;
|
||||||
|
} else {
|
||||||
|
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||||
|
|
||||||
|
// OK: some codecs can't handle binary DV > 32K
|
||||||
|
w.rollback();
|
||||||
|
d.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
|
||||||
|
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
|
||||||
|
for(int docID=0;docID<docBytes.size();docID++) {
|
||||||
|
StoredDocument doc = ar.document(docID);
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
s.get(docID, bytes);
|
||||||
|
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||||
|
assertEquals(expected.length, bytes.length);
|
||||||
|
assertEquals(new BytesRef(expected), bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(codecAcceptsHugeBinaryValues("field"));
|
||||||
|
|
||||||
|
ar.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHugeBinaryValueLimit() throws Exception {
|
||||||
|
// We only test DVFormats that have a limit
|
||||||
|
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
// FSDirectory because SimpleText will consume gobbs of
|
||||||
|
// space when storing big binary values:
|
||||||
|
Directory d = newFSDirectory(_TestUtil.getTempDir("hugeBinaryValues"));
|
||||||
|
boolean doFixed = random().nextBoolean();
|
||||||
|
int numDocs;
|
||||||
|
int fixedLength = 0;
|
||||||
|
if (doFixed) {
|
||||||
|
// Sometimes make all values fixed length since some
|
||||||
|
// codecs have different code paths for this:
|
||||||
|
numDocs = _TestUtil.nextInt(random(), 10, 20);
|
||||||
|
fixedLength = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
|
||||||
|
} else {
|
||||||
|
numDocs = _TestUtil.nextInt(random(), 100, 200);
|
||||||
|
}
|
||||||
|
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||||
|
List<byte[]> docBytes = new ArrayList<byte[]>();
|
||||||
|
long totalBytes = 0;
|
||||||
|
for(int docID=0;docID<numDocs;docID++) {
|
||||||
|
// we don't use RandomIndexWriter because it might add
|
||||||
|
// more docvalues than we expect !!!!
|
||||||
|
|
||||||
|
// Must be > 64KB in size to ensure more than 2 pages in
|
||||||
|
// PagedBytes would be needed:
|
||||||
|
int numBytes;
|
||||||
|
if (doFixed) {
|
||||||
|
numBytes = fixedLength;
|
||||||
|
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||||
|
numBytes = Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH;
|
||||||
|
} else {
|
||||||
|
numBytes = _TestUtil.nextInt(random(), 1, Lucene42DocValuesFormat.MAX_BINARY_FIELD_LENGTH);
|
||||||
|
}
|
||||||
|
totalBytes += numBytes;
|
||||||
|
if (totalBytes > 5 * 1024*1024) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
byte[] bytes = new byte[numBytes];
|
||||||
|
random().nextBytes(bytes);
|
||||||
|
docBytes.add(bytes);
|
||||||
|
Document doc = new Document();
|
||||||
|
BytesRef b = new BytesRef(bytes);
|
||||||
|
b.length = bytes.length;
|
||||||
|
doc.add(new BinaryDocValuesField("field", b));
|
||||||
|
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
DirectoryReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
|
||||||
|
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
|
||||||
|
for(int docID=0;docID<docBytes.size();docID++) {
|
||||||
|
StoredDocument doc = ar.document(docID);
|
||||||
|
BytesRef bytes = new BytesRef();
|
||||||
|
s.get(docID, bytes);
|
||||||
|
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||||
|
assertEquals(expected.length, bytes.length);
|
||||||
|
assertEquals(new BytesRef(expected), bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
ar.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,12 +59,12 @@ import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.CheckIndex;
|
|
||||||
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
||||||
import org.apache.lucene.index.CheckIndex.Status.FieldNormStatus;
|
import org.apache.lucene.index.CheckIndex.Status.FieldNormStatus;
|
||||||
import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus;
|
import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus;
|
||||||
import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus;
|
import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus;
|
||||||
import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus;
|
import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus;
|
||||||
|
import org.apache.lucene.index.CheckIndex;
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
@ -744,13 +744,24 @@ public class _TestUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getDocValuesFormat(Codec codec, String field) {
|
public static String getDocValuesFormat(String field) {
|
||||||
DocValuesFormat d = codec.docValuesFormat();
|
return getDocValuesFormat(Codec.getDefault(), field);
|
||||||
if (d instanceof PerFieldDocValuesFormat) {
|
|
||||||
return ((PerFieldDocValuesFormat)d).getDocValuesFormatForField(field).getName();
|
|
||||||
} else {
|
|
||||||
return d.getName();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getDocValuesFormat(Codec codec, String field) {
|
||||||
|
DocValuesFormat f = codec.docValuesFormat();
|
||||||
|
if (f instanceof PerFieldDocValuesFormat) {
|
||||||
|
return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName();
|
||||||
|
} else {
|
||||||
|
return f.getName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean fieldSupportsHugeBinaryDocValues(String field) {
|
||||||
|
String dvFormat = getDocValuesFormat(field);
|
||||||
|
return dvFormat.equals("CheapBastard") ||
|
||||||
|
dvFormat.equals("Disk") ||
|
||||||
|
dvFormat.equals("SimpleText");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
||||||
|
|
|
@ -130,6 +130,8 @@ Bug Fixes
|
||||||
of divide by zero, and makes estimated hit counts meaningful in non-optimized
|
of divide by zero, and makes estimated hit counts meaningful in non-optimized
|
||||||
indexes. (hossman)
|
indexes. (hossman)
|
||||||
|
|
||||||
|
* SOLR-5164: Can not create a collection via collections API (cloud mode) (Erick Erickson)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,6 @@ import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.OutputStreamWriter;
|
import java.io.OutputStreamWriter;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
@ -78,6 +77,7 @@ public class CorePropertiesLocator implements CoresLocator {
|
||||||
Properties p = buildCoreProperties(cd);
|
Properties p = buildCoreProperties(cd);
|
||||||
Writer os = null;
|
Writer os = null;
|
||||||
try {
|
try {
|
||||||
|
propfile.getParentFile().mkdirs();
|
||||||
os = new OutputStreamWriter(new FileOutputStream(propfile), Charsets.UTF_8);
|
os = new OutputStreamWriter(new FileOutputStream(propfile), Charsets.UTF_8);
|
||||||
p.store(os, "Written by CorePropertiesLocator");
|
p.store(os, "Written by CorePropertiesLocator");
|
||||||
}
|
}
|
||||||
|
|
|
@ -409,8 +409,9 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
||||||
String name = checkNotEmpty(params.get(CoreAdminParams.NAME),
|
String name = checkNotEmpty(params.get(CoreAdminParams.NAME),
|
||||||
"Missing parameter [" + CoreAdminParams.NAME + "]");
|
"Missing parameter [" + CoreAdminParams.NAME + "]");
|
||||||
String instancedir = params.get(CoreAdminParams.INSTANCE_DIR);
|
String instancedir = params.get(CoreAdminParams.INSTANCE_DIR);
|
||||||
if (StringUtils.isEmpty(instancedir))
|
if (StringUtils.isEmpty(instancedir)) {
|
||||||
instancedir = container.getSolrHome() + File.separator + name;
|
instancedir = name; // Already relative to solrHome, we haven't been given an absolute path.
|
||||||
|
}
|
||||||
|
|
||||||
Properties coreProps = new Properties();
|
Properties coreProps = new Properties();
|
||||||
for (String param : paramToProp.keySet()) {
|
for (String param : paramToProp.keySet()) {
|
||||||
|
|
Loading…
Reference in New Issue