mirror of https://github.com/apache/lucene.git
LUCENE-6668: Added table encoding to sorted set/numeric doc values.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692058 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
983d9efe76
commit
6a481c0a58
|
@ -352,6 +352,10 @@ Optimizations
|
||||||
and TermsQuery. This should especially help when there are lots of small
|
and TermsQuery. This should especially help when there are lots of small
|
||||||
postings lists. (Adrien Grand, Mike McCandless)
|
postings lists. (Adrien Grand, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-6668: Optimized storage for sorted set and sorted numeric doc values
|
||||||
|
in the case that there are few unique sets of values.
|
||||||
|
(Adrien Grand, Robert Muir)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-6518: Don't report false thread leaks from IBM J9
|
* LUCENE-6518: Don't report false thread leaks from IBM J9
|
||||||
|
|
|
@ -23,6 +23,11 @@ import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.SortedSet;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
|
@ -34,6 +39,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.apache.lucene.util.MathUtil;
|
import org.apache.lucene.util.MathUtil;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
|
import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
|
||||||
|
@ -463,11 +469,22 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
// The field is single-valued, we can encode it as NUMERIC
|
// The field is single-valued, we can encode it as NUMERIC
|
||||||
addNumericField(field, singletonView(docToValueCount, values, null));
|
addNumericField(field, singletonView(docToValueCount, values, null));
|
||||||
} else {
|
} else {
|
||||||
meta.writeVInt(SORTED_WITH_ADDRESSES);
|
final SortedSet<LongsRef> uniqueValueSets = uniqueValueSets(docToValueCount, values);
|
||||||
// write the stream of values as a numeric field
|
if (uniqueValueSets != null) {
|
||||||
addNumericField(field, values, true);
|
meta.writeVInt(SORTED_SET_TABLE);
|
||||||
// write the doc -> ord count as a absolute index to the stream
|
|
||||||
addAddresses(field, docToValueCount);
|
// write the set_id -> values mapping
|
||||||
|
writeDictionary(uniqueValueSets);
|
||||||
|
|
||||||
|
// write the doc -> set_id as a numeric field
|
||||||
|
addNumericField(field, docToSetId(uniqueValueSets, docToValueCount, values), false);
|
||||||
|
} else {
|
||||||
|
meta.writeVInt(SORTED_WITH_ADDRESSES);
|
||||||
|
// write the stream of values as a numeric field
|
||||||
|
addNumericField(field, values, true);
|
||||||
|
// write the doc -> ord count as a absolute index to the stream
|
||||||
|
addAddresses(field, docToValueCount);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -481,20 +498,120 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
// The field is single-valued, we can encode it as SORTED
|
// The field is single-valued, we can encode it as SORTED
|
||||||
addSortedField(field, values, singletonView(docToOrdCount, ords, -1L));
|
addSortedField(field, values, singletonView(docToOrdCount, ords, -1L));
|
||||||
} else {
|
} else {
|
||||||
meta.writeVInt(SORTED_WITH_ADDRESSES);
|
final SortedSet<LongsRef> uniqueValueSets = uniqueValueSets(docToOrdCount, ords);
|
||||||
|
if (uniqueValueSets != null) {
|
||||||
|
meta.writeVInt(SORTED_SET_TABLE);
|
||||||
|
|
||||||
// write the ord -> byte[] as a binary field
|
// write the set_id -> ords mapping
|
||||||
addTermsDict(field, values);
|
writeDictionary(uniqueValueSets);
|
||||||
|
|
||||||
// write the stream of ords as a numeric field
|
// write the ord -> byte[] as a binary field
|
||||||
// NOTE: we could return an iterator that delta-encodes these within a doc
|
addTermsDict(field, values);
|
||||||
addNumericField(field, ords, false);
|
|
||||||
|
|
||||||
// write the doc -> ord count as a absolute index to the stream
|
// write the doc -> set_id as a numeric field
|
||||||
addAddresses(field, docToOrdCount);
|
addNumericField(field, docToSetId(uniqueValueSets, docToOrdCount, ords), false);
|
||||||
|
} else {
|
||||||
|
meta.writeVInt(SORTED_WITH_ADDRESSES);
|
||||||
|
|
||||||
|
// write the ord -> byte[] as a binary field
|
||||||
|
addTermsDict(field, values);
|
||||||
|
|
||||||
|
// write the stream of ords as a numeric field
|
||||||
|
// NOTE: we could return an iterator that delta-encodes these within a doc
|
||||||
|
addNumericField(field, ords, false);
|
||||||
|
|
||||||
|
// write the doc -> ord count as a absolute index to the stream
|
||||||
|
addAddresses(field, docToOrdCount);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SortedSet<LongsRef> uniqueValueSets(Iterable<Number> docToValueCount, Iterable<Number> values) {
|
||||||
|
Set<LongsRef> uniqueValueSet = new HashSet<>();
|
||||||
|
LongsRef docValues = new LongsRef(256);
|
||||||
|
|
||||||
|
Iterator<Number> valueCountIterator = docToValueCount.iterator();
|
||||||
|
Iterator<Number> valueIterator = values.iterator();
|
||||||
|
int totalDictSize = 0;
|
||||||
|
while (valueCountIterator.hasNext()) {
|
||||||
|
docValues.length = valueCountIterator.next().intValue();
|
||||||
|
if (docValues.length > 256) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < docValues.length; ++i) {
|
||||||
|
docValues.longs[i] = valueIterator.next().longValue();
|
||||||
|
}
|
||||||
|
if (uniqueValueSet.contains(docValues)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
totalDictSize += docValues.length;
|
||||||
|
if (totalDictSize > 256) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
uniqueValueSet.add(new LongsRef(Arrays.copyOf(docValues.longs, docValues.length), 0, docValues.length));
|
||||||
|
}
|
||||||
|
assert valueIterator.hasNext() == false;
|
||||||
|
return new TreeSet<>(uniqueValueSet);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeDictionary(SortedSet<LongsRef> uniqueValueSets) throws IOException {
|
||||||
|
int lengthSum = 0;
|
||||||
|
for (LongsRef longs : uniqueValueSets) {
|
||||||
|
lengthSum += longs.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
meta.writeInt(lengthSum);
|
||||||
|
for (LongsRef valueSet : uniqueValueSets) {
|
||||||
|
for (int i = 0; i < valueSet.length; ++i) {
|
||||||
|
meta.writeLong(valueSet.longs[valueSet.offset + i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
meta.writeInt(uniqueValueSets.size());
|
||||||
|
for (LongsRef valueSet : uniqueValueSets) {
|
||||||
|
meta.writeInt(valueSet.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Iterable<Number> docToSetId(SortedSet<LongsRef> uniqueValueSets, Iterable<Number> docToValueCount, Iterable<Number> values) {
|
||||||
|
final Map<LongsRef, Integer> setIds = new HashMap<>();
|
||||||
|
int i = 0;
|
||||||
|
for (LongsRef set : uniqueValueSets) {
|
||||||
|
setIds.put(set, i++);
|
||||||
|
}
|
||||||
|
assert i == uniqueValueSets.size();
|
||||||
|
|
||||||
|
return new Iterable<Number>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<Number> iterator() {
|
||||||
|
final Iterator<Number> valueCountIterator = docToValueCount.iterator();
|
||||||
|
final Iterator<Number> valueIterator = values.iterator();
|
||||||
|
final LongsRef docValues = new LongsRef(256);
|
||||||
|
return new Iterator<Number>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return valueCountIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Number next() {
|
||||||
|
docValues.length = valueCountIterator.next().intValue();
|
||||||
|
for (int i = 0; i < docValues.length; ++i) {
|
||||||
|
docValues.longs[i] = valueIterator.next().longValue();
|
||||||
|
}
|
||||||
|
final Integer id = setIds.get(docValues);
|
||||||
|
assert id != null;
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// writes addressing information as MONOTONIC_COMPRESSED integer
|
// writes addressing information as MONOTONIC_COMPRESSED integer
|
||||||
private void addAddresses(FieldInfo field, Iterable<Number> values) throws IOException {
|
private void addAddresses(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
meta.writeVInt(field.number);
|
meta.writeVInt(field.number);
|
||||||
|
|
|
@ -72,13 +72,21 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
* <p>
|
* <p>
|
||||||
* {@link DocValuesType#SORTED_SET SORTED_SET}:
|
* {@link DocValuesType#SORTED_SET SORTED_SET}:
|
||||||
* <ul>
|
* <ul>
|
||||||
|
* <li>Single: if all documents have 0 or 1 value, then data are written like SORTED.
|
||||||
|
* <li>SortedSet table: when there are few unique sets of values (< 256) then each set is assigned
|
||||||
|
* an id, a lookup table is written and the mapping from document to set id is written using the
|
||||||
|
* numeric strategies above.
|
||||||
* <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Binary,
|
* <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Binary,
|
||||||
* an ordinal list and per-document index into this list are written using the numeric strategies
|
* an ordinal list and per-document index into this list are written using the numeric strategies
|
||||||
* above.
|
* above.
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
* {@link DocValuesType#SORTED_NUMERIC SORTED_NUMERIC}:
|
* {@link DocValuesType#SORTED_NUMERIC SORTED_NUMERIC}:
|
||||||
* <ul>
|
* <ul>
|
||||||
|
* <li>Single: if all documents have 0 or 1 value, then data are written like NUMERIC.
|
||||||
|
* <li>SortedSet table: when there are few unique sets of values (< 256) then each set is assigned
|
||||||
|
* an id, a lookup table is written and the mapping from document to set id is written using the
|
||||||
|
* numeric strategies above.
|
||||||
* <li>SortedNumeric: a value list and per-document index into this list are written using the numeric
|
* <li>SortedNumeric: a value list and per-document index into this list are written using the numeric
|
||||||
* strategies above.
|
* strategies above.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
@ -108,21 +116,24 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
* <li>PrefixBinaryEntry --> BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
|
* <li>PrefixBinaryEntry --> BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
|
||||||
* <li>BinaryHeader --> FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
|
* <li>BinaryHeader --> FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
|
||||||
* <li>SortedEntry --> FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
|
* <li>SortedEntry --> FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
|
||||||
* <li>SortedSetEntry --> EntryType,BinaryEntry,NumericEntry,NumericEntry</li>
|
* <li>SortedSetEntry --> SingleSortedSetEntry | AddressesSortedSetEntry | TableSortedSetEntry</li>
|
||||||
* <li>SortedNumericEntry --> EntryType,NumericEntry,NumericEntry</li>
|
* <li>SingleSortedSetEntry --> SetHeader,SortedEntry</li>
|
||||||
|
* <li>AddressesSortedSetEntry --> SetHeader,BinaryEntry,NumericEntry,NumericEntry</li>
|
||||||
|
* <li>TableSortedSetEntry --> SetHeader,TotalTableLength,{@link DataOutput#writeLong Int64}<sup>TotalTableLength</sup>,TableSize,{@link DataOutput#writeInt Int32}<sup>TableSize</sup>,BinaryEntry,NumericEntry</li>
|
||||||
|
* <li>SetHeader --> FieldNumber,EntryType,SetType</li>
|
||||||
|
* <li>SortedNumericEntry --> SingleSortedNumericEntry | AddressesSortedNumericEntry | TableSortedNumericEntry</li>
|
||||||
|
* <li>SingleNumericEntry --> SetHeader,NumericEntry</li>
|
||||||
|
* <li>AddressesSortedNumericEntry --> SetHeader,NumericEntry,NumericEntry</li>
|
||||||
|
* <li>TableSortedNumericEntry --> SetHeader,TotalTableLength,{@link DataOutput#writeLong Int64}<sup>TotalTableLength</sup>,TableSize,{@link DataOutput#writeInt Int32}<sup>TableSize</sup>,NumericEntry</li>
|
||||||
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li>
|
* <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li>
|
||||||
* <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li>
|
* <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li>
|
||||||
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset,EndOffset --> {@link DataOutput#writeLong Int64}</li>
|
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset,EndOffset --> {@link DataOutput#writeLong Int64}</li>
|
||||||
* <li>TableSize,BitsPerValue --> {@link DataOutput#writeVInt vInt}</li>
|
* <li>TableSize,BitsPerValue,TotalTableLength --> {@link DataOutput#writeVInt vInt}</li>
|
||||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
|
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
|
||||||
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
|
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
|
||||||
* <p>SortedSet fields have three entries: a BinaryEntry with the value metadata,
|
|
||||||
* and two NumericEntries for the document-to-ord-index and ordinal list metadata.</p>
|
|
||||||
* <p>SortedNumeric fields have two entries: A NumericEntry with the value metadata,
|
|
||||||
* and a numeric entry with the document-to-value index.</p>
|
|
||||||
* <p>FieldNumber of -1 indicates the end of metadata.</p>
|
* <p>FieldNumber of -1 indicates the end of metadata.</p>
|
||||||
* <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
|
* <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
|
||||||
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
|
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
|
||||||
|
@ -144,6 +155,15 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
* <li>1 --> variable-width. An address for each value is stored.
|
* <li>1 --> variable-width. An address for each value is stored.
|
||||||
* <li>2 --> prefix-compressed. An address to the start of every interval'th value is stored.
|
* <li>2 --> prefix-compressed. An address to the start of every interval'th value is stored.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>SetType indicates how SortedSet and SortedNumeric values will be stored:
|
||||||
|
* <ul>
|
||||||
|
* <li>0 --> with addresses. There are two numeric entries: a first one from document to start
|
||||||
|
* offset, and a second one from offset to ord/value.
|
||||||
|
* <li>1 --> single-valued. Used when all documents have at most one value and is encoded like
|
||||||
|
* a regular Sorted/Numeric entry.
|
||||||
|
* <li>2 --> table-encoded. A lookup table of unique sets of values is written, followed by a
|
||||||
|
* numeric entry that maps each document to an ordinal in this table.
|
||||||
|
* </ul>
|
||||||
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
|
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
|
||||||
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
|
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
|
||||||
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
|
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
|
||||||
|
@ -187,7 +207,8 @@ public final class Lucene50DocValuesFormat extends DocValuesFormat {
|
||||||
static final String META_CODEC = "Lucene50DocValuesMetadata";
|
static final String META_CODEC = "Lucene50DocValuesMetadata";
|
||||||
static final String META_EXTENSION = "dvm";
|
static final String META_EXTENSION = "dvm";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_SORTEDSET_TABLE = 1;
|
||||||
|
static final int VERSION_CURRENT = VERSION_SORTEDSET_TABLE;
|
||||||
|
|
||||||
// indicates docvalues type
|
// indicates docvalues type
|
||||||
static final byte NUMERIC = 0;
|
static final byte NUMERIC = 0;
|
||||||
|
@ -235,6 +256,9 @@ public final class Lucene50DocValuesFormat extends DocValuesFormat {
|
||||||
/** Single-valued sorted set values, encoded as sorted values, so no level
|
/** Single-valued sorted set values, encoded as sorted values, so no level
|
||||||
* of indirection: {@code docId -> ord}. */
|
* of indirection: {@code docId -> ord}. */
|
||||||
static final int SORTED_SINGLE_VALUED = 1;
|
static final int SORTED_SINGLE_VALUED = 1;
|
||||||
|
/** Compressed giving IDs to unique sets of values:
|
||||||
|
* {@code docId -> setId -> ords} */
|
||||||
|
static final int SORTED_SET_TABLE = 2;
|
||||||
|
|
||||||
/** placeholder for missing offset that means there are no missing values */
|
/** placeholder for missing offset that means there are no missing values */
|
||||||
static final int ALL_LIVE = -1;
|
static final int ALL_LIVE = -1;
|
||||||
|
|
|
@ -206,6 +206,28 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
ordIndexes.put(info.name, n2);
|
ordIndexes.put(info.name, n2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void readSortedSetFieldWithTable(FieldInfo info, IndexInput meta) throws IOException {
|
||||||
|
// sortedset table = binary + ordset table + ordset index
|
||||||
|
if (meta.readVInt() != info.number) {
|
||||||
|
throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
if (meta.readByte() != Lucene50DocValuesFormat.BINARY) {
|
||||||
|
throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
BinaryEntry b = readBinaryEntry(meta);
|
||||||
|
binaries.put(info.name, b);
|
||||||
|
|
||||||
|
if (meta.readVInt() != info.number) {
|
||||||
|
throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
if (meta.readByte() != Lucene50DocValuesFormat.NUMERIC) {
|
||||||
|
throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
NumericEntry n = readNumericEntry(meta);
|
||||||
|
ords.put(info.name, n);
|
||||||
|
}
|
||||||
|
|
||||||
private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
|
private int readFields(IndexInput meta, FieldInfos infos) throws IOException {
|
||||||
int numFields = 0;
|
int numFields = 0;
|
||||||
int fieldNumber = meta.readVInt();
|
int fieldNumber = meta.readVInt();
|
||||||
|
@ -229,6 +251,8 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
sortedSets.put(info.name, ss);
|
sortedSets.put(info.name, ss);
|
||||||
if (ss.format == SORTED_WITH_ADDRESSES) {
|
if (ss.format == SORTED_WITH_ADDRESSES) {
|
||||||
readSortedSetFieldWithAddresses(info, meta);
|
readSortedSetFieldWithAddresses(info, meta);
|
||||||
|
} else if (ss.format == SORTED_SET_TABLE) {
|
||||||
|
readSortedSetFieldWithTable(info, meta);
|
||||||
} else if (ss.format == SORTED_SINGLE_VALUED) {
|
} else if (ss.format == SORTED_SINGLE_VALUED) {
|
||||||
if (meta.readVInt() != fieldNumber) {
|
if (meta.readVInt() != fieldNumber) {
|
||||||
throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
|
throw new CorruptIndexException("sortedset entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
@ -243,14 +267,14 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
} else if (type == Lucene50DocValuesFormat.SORTED_NUMERIC) {
|
} else if (type == Lucene50DocValuesFormat.SORTED_NUMERIC) {
|
||||||
SortedSetEntry ss = readSortedSetEntry(meta);
|
SortedSetEntry ss = readSortedSetEntry(meta);
|
||||||
sortedNumerics.put(info.name, ss);
|
sortedNumerics.put(info.name, ss);
|
||||||
if (meta.readVInt() != fieldNumber) {
|
|
||||||
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
|
||||||
}
|
|
||||||
if (meta.readByte() != Lucene50DocValuesFormat.NUMERIC) {
|
|
||||||
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
|
||||||
}
|
|
||||||
numerics.put(info.name, readNumericEntry(meta));
|
|
||||||
if (ss.format == SORTED_WITH_ADDRESSES) {
|
if (ss.format == SORTED_WITH_ADDRESSES) {
|
||||||
|
if (meta.readVInt() != fieldNumber) {
|
||||||
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
if (meta.readByte() != Lucene50DocValuesFormat.NUMERIC) {
|
||||||
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
numerics.put(info.name, readNumericEntry(meta));
|
||||||
if (meta.readVInt() != fieldNumber) {
|
if (meta.readVInt() != fieldNumber) {
|
||||||
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
}
|
}
|
||||||
|
@ -259,7 +283,24 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
}
|
}
|
||||||
NumericEntry ordIndex = readNumericEntry(meta);
|
NumericEntry ordIndex = readNumericEntry(meta);
|
||||||
ordIndexes.put(info.name, ordIndex);
|
ordIndexes.put(info.name, ordIndex);
|
||||||
} else if (ss.format != SORTED_SINGLE_VALUED) {
|
} else if (ss.format == SORTED_SET_TABLE) {
|
||||||
|
if (meta.readVInt() != info.number) {
|
||||||
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
if (meta.readByte() != Lucene50DocValuesFormat.NUMERIC) {
|
||||||
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
NumericEntry n = readNumericEntry(meta);
|
||||||
|
ords.put(info.name, n);
|
||||||
|
} else if (ss.format == SORTED_SINGLE_VALUED) {
|
||||||
|
if (meta.readVInt() != fieldNumber) {
|
||||||
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
if (meta.readByte() != Lucene50DocValuesFormat.NUMERIC) {
|
||||||
|
throw new CorruptIndexException("sortednumeric entry for field: " + info.name + " is corrupt", meta);
|
||||||
|
}
|
||||||
|
numerics.put(info.name, readNumericEntry(meta));
|
||||||
|
} else {
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -346,7 +387,24 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
SortedSetEntry readSortedSetEntry(IndexInput meta) throws IOException {
|
SortedSetEntry readSortedSetEntry(IndexInput meta) throws IOException {
|
||||||
SortedSetEntry entry = new SortedSetEntry();
|
SortedSetEntry entry = new SortedSetEntry();
|
||||||
entry.format = meta.readVInt();
|
entry.format = meta.readVInt();
|
||||||
if (entry.format != SORTED_SINGLE_VALUED && entry.format != SORTED_WITH_ADDRESSES) {
|
if (entry.format == SORTED_SET_TABLE) {
|
||||||
|
final int totalTableLength = meta.readInt();
|
||||||
|
if (totalTableLength > 256) {
|
||||||
|
throw new CorruptIndexException("SORTED_SET_TABLE cannot have more than 256 values in its dictionary, got=" + totalTableLength, meta);
|
||||||
|
}
|
||||||
|
entry.table = new long[totalTableLength];
|
||||||
|
for (int i = 0; i < totalTableLength; ++i) {
|
||||||
|
entry.table[i] = meta.readLong();
|
||||||
|
}
|
||||||
|
final int tableSize = meta.readInt();
|
||||||
|
if (tableSize > totalTableLength + 1) { // +1 because of the empty set
|
||||||
|
throw new CorruptIndexException("SORTED_SET_TABLE cannot have more set ids than ords in its dictionary, got " + totalTableLength + " ords and " + tableSize + " sets", meta);
|
||||||
|
}
|
||||||
|
entry.tableOffsets = new int[tableSize + 1];
|
||||||
|
for (int i = 1; i < entry.tableOffsets.length; ++i) {
|
||||||
|
entry.tableOffsets[i] = entry.tableOffsets[i - 1] + meta.readInt();
|
||||||
|
}
|
||||||
|
} else if (entry.format != SORTED_SINGLE_VALUED && entry.format != SORTED_WITH_ADDRESSES) {
|
||||||
throw new CorruptIndexException("Unknown format: " + entry.format, meta);
|
throw new CorruptIndexException("Unknown format: " + entry.format, meta);
|
||||||
}
|
}
|
||||||
return entry;
|
return entry;
|
||||||
|
@ -611,12 +669,14 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
@Override
|
@Override
|
||||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||||
SortedSetEntry ss = sortedNumerics.get(field.name);
|
SortedSetEntry ss = sortedNumerics.get(field.name);
|
||||||
NumericEntry numericEntry = numerics.get(field.name);
|
|
||||||
final LongValues values = getNumeric(numericEntry);
|
|
||||||
if (ss.format == SORTED_SINGLE_VALUED) {
|
if (ss.format == SORTED_SINGLE_VALUED) {
|
||||||
|
NumericEntry numericEntry = numerics.get(field.name);
|
||||||
|
final LongValues values = getNumeric(numericEntry);
|
||||||
final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
|
final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
|
||||||
return DocValues.singleton(values, docsWithField);
|
return DocValues.singleton(values, docsWithField);
|
||||||
} else if (ss.format == SORTED_WITH_ADDRESSES) {
|
} else if (ss.format == SORTED_WITH_ADDRESSES) {
|
||||||
|
NumericEntry numericEntry = numerics.get(field.name);
|
||||||
|
final LongValues values = getNumeric(numericEntry);
|
||||||
final MonotonicBlockPackedReader ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
|
final MonotonicBlockPackedReader ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
|
||||||
|
|
||||||
return new SortedNumericDocValues() {
|
return new SortedNumericDocValues() {
|
||||||
|
@ -639,6 +699,33 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
return (int) (endOffset - startOffset);
|
return (int) (endOffset - startOffset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
} else if (ss.format == SORTED_SET_TABLE) {
|
||||||
|
NumericEntry entry = ords.get(field.name);
|
||||||
|
final LongValues ordinals = getNumeric(entry);
|
||||||
|
|
||||||
|
final long[] table = ss.table;
|
||||||
|
final int[] offsets = ss.tableOffsets;
|
||||||
|
return new SortedNumericDocValues() {
|
||||||
|
int startOffset;
|
||||||
|
int endOffset;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDocument(int doc) {
|
||||||
|
final int ord = (int) ordinals.get(doc);
|
||||||
|
startOffset = offsets[ord];
|
||||||
|
endOffset = offsets[ord + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long valueAt(int index) {
|
||||||
|
return table[startOffset + index];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int count() {
|
||||||
|
return endOffset - startOffset;
|
||||||
|
}
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
|
@ -647,13 +734,20 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
@Override
|
@Override
|
||||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||||
SortedSetEntry ss = sortedSets.get(field.name);
|
SortedSetEntry ss = sortedSets.get(field.name);
|
||||||
if (ss.format == SORTED_SINGLE_VALUED) {
|
switch (ss.format) {
|
||||||
final SortedDocValues values = getSorted(field);
|
case SORTED_SINGLE_VALUED:
|
||||||
return DocValues.singleton(values);
|
final SortedDocValues values = getSorted(field);
|
||||||
} else if (ss.format != SORTED_WITH_ADDRESSES) {
|
return DocValues.singleton(values);
|
||||||
throw new AssertionError();
|
case SORTED_WITH_ADDRESSES:
|
||||||
|
return getSortedSetWithAddresses(field);
|
||||||
|
case SORTED_SET_TABLE:
|
||||||
|
return getSortedSetTable(field, ss);
|
||||||
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SortedSetDocValues getSortedSetWithAddresses(FieldInfo field) throws IOException {
|
||||||
final long valueCount = binaries.get(field.name).count;
|
final long valueCount = binaries.get(field.name).count;
|
||||||
// we keep the byte[]s and list of ords on disk, these could be large
|
// we keep the byte[]s and list of ords on disk, these could be large
|
||||||
final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
|
final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
|
||||||
|
@ -722,7 +816,76 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
|
||||||
|
final long valueCount = binaries.get(field.name).count;
|
||||||
|
final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
|
||||||
|
final LongValues ordinals = getNumeric(ords.get(field.name));
|
||||||
|
|
||||||
|
final long[] table = ss.table;
|
||||||
|
final int[] offsets = ss.tableOffsets;
|
||||||
|
|
||||||
|
return new RandomAccessOrds() {
|
||||||
|
|
||||||
|
int offset, startOffset, endOffset;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDocument(int docID) {
|
||||||
|
final int ord = (int) ordinals.get(docID);
|
||||||
|
offset = startOffset = offsets[ord];
|
||||||
|
endOffset = offsets[ord + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ordAt(int index) {
|
||||||
|
return table[startOffset + index];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long nextOrd() {
|
||||||
|
if (offset == endOffset) {
|
||||||
|
return NO_MORE_ORDS;
|
||||||
|
} else {
|
||||||
|
return table[offset++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int cardinality() {
|
||||||
|
return endOffset - startOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef lookupOrd(long ord) {
|
||||||
|
return binary.get(ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getValueCount() {
|
||||||
|
return valueCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long lookupTerm(BytesRef key) {
|
||||||
|
if (binary instanceof CompressedBinaryDocValues) {
|
||||||
|
return ((CompressedBinaryDocValues) binary).lookupTerm(key);
|
||||||
|
} else {
|
||||||
|
return super.lookupTerm(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum() {
|
||||||
|
if (binary instanceof CompressedBinaryDocValues) {
|
||||||
|
return ((CompressedBinaryDocValues) binary).getTermsEnum();
|
||||||
|
} else {
|
||||||
|
return super.termsEnum();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private Bits getLiveBits(final long offset, final int count) throws IOException {
|
private Bits getLiveBits(final long offset, final int count) throws IOException {
|
||||||
if (offset == ALL_MISSING) {
|
if (offset == ALL_MISSING) {
|
||||||
return new Bits.MatchNoBits(count);
|
return new Bits.MatchNoBits(count);
|
||||||
|
@ -831,6 +994,9 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
static class SortedSetEntry {
|
static class SortedSetEntry {
|
||||||
private SortedSetEntry() {}
|
private SortedSetEntry() {}
|
||||||
int format;
|
int format;
|
||||||
|
|
||||||
|
long[] table;
|
||||||
|
int[] tableOffsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
// internally we compose complex dv (sorted/sortedset) from other ones
|
// internally we compose complex dv (sorted/sortedset) from other ones
|
||||||
|
|
|
@ -64,7 +64,7 @@ public class TestLucene50DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
public void testSortedSetVariableLengthBigVsStoredFields() throws Exception {
|
public void testSortedSetVariableLengthBigVsStoredFields() throws Exception {
|
||||||
int numIterations = atLeast(1);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
doTestSortedSetVsStoredFields(atLeast(300), 1, 32766, 16);
|
doTestSortedSetVsStoredFields(atLeast(300), 1, 32766, 16, 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ public class TestLucene50DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
public void testSortedSetVariableLengthManyVsStoredFields() throws Exception {
|
public void testSortedSetVariableLengthManyVsStoredFields() throws Exception {
|
||||||
int numIterations = atLeast(1);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
doTestSortedSetVsStoredFields(TestUtil.nextInt(random(), 1024, 2049), 1, 500, 16);
|
doTestSortedSetVsStoredFields(TestUtil.nextInt(random(), 1024, 2049), 1, 500, 16, 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
@ -62,6 +63,8 @@ import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
|
||||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1940,29 +1943,30 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc) throws Exception {
|
protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception {
|
||||||
Directory dir = newFSDirectory(createTempDir("dvduel"));
|
Directory dir = newFSDirectory(createTempDir("dvduel"));
|
||||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
|
||||||
|
Set<String> valueSet = new HashSet<String>();
|
||||||
|
for (int i = 0; i < 10000 && valueSet.size() < maxUniqueValues; ++i) {
|
||||||
|
final int length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||||
|
valueSet.add(TestUtil.randomSimpleString(random(), length));
|
||||||
|
}
|
||||||
|
String[] uniqueValues = valueSet.toArray(new String[0]);
|
||||||
|
|
||||||
// index some docs
|
// index some docs
|
||||||
for (int i = 0; i < numDocs; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
|
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
|
||||||
doc.add(idField);
|
doc.add(idField);
|
||||||
final int length;
|
|
||||||
if (minLength == maxLength) {
|
|
||||||
length = minLength; // fixed length
|
|
||||||
} else {
|
|
||||||
length = TestUtil.nextInt(random(), minLength, maxLength);
|
|
||||||
}
|
|
||||||
int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
|
int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
|
||||||
// create a random set of strings
|
// create a random set of strings
|
||||||
Set<String> values = new TreeSet<>();
|
Set<String> values = new TreeSet<>();
|
||||||
for (int v = 0; v < numValues; v++) {
|
for (int v = 0; v < numValues; v++) {
|
||||||
values.add(TestUtil.randomSimpleString(random(), length));
|
values.add(RandomPicks.randomFrom(random(), uniqueValues));
|
||||||
}
|
}
|
||||||
|
|
||||||
// add ordered to the stored field
|
// add ordered to the stored field
|
||||||
for (String v : values) {
|
for (String v : values) {
|
||||||
doc.add(new StoredField("stored", v));
|
doc.add(new StoredField("stored", v));
|
||||||
|
@ -2041,7 +2045,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
||||||
int numIterations = atLeast(1);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||||
doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 16);
|
doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 16, 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2107,12 +2111,37 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedNumericsFewUniqueSetsVsStoredFields() throws Exception {
|
||||||
|
assumeTrue("Codec does not support SORTED_NUMERIC", codecSupportsSortedNumeric());
|
||||||
|
final long[] values = new long[TestUtil.nextInt(random(), 2, 6)];
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
values[i] = random().nextLong();
|
||||||
|
}
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedNumericsVsStoredFields(
|
||||||
|
new LongProducer() {
|
||||||
|
@Override
|
||||||
|
long next() {
|
||||||
|
return TestUtil.nextLong(random(), 0, 6);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
new LongProducer() {
|
||||||
|
@Override
|
||||||
|
long next() {
|
||||||
|
return values[random().nextInt(values.length)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testSortedSetVariableLengthVsStoredFields() throws Exception {
|
public void testSortedSetVariableLengthVsStoredFields() throws Exception {
|
||||||
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
||||||
int numIterations = atLeast(1);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 16);
|
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 16, 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2121,7 +2150,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
||||||
int numIterations = atLeast(1);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||||
doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 1);
|
doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 1, 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2129,7 +2158,39 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
||||||
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
||||||
int numIterations = atLeast(1);
|
int numIterations = atLeast(1);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 1);
|
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 1, 100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetFixedLengthFewUniqueSetsVsStoredFields() throws Exception {
|
||||||
|
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedSetVsStoredFields(atLeast(300), 10, 10, 6, 6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetVariableLengthFewUniqueSetsVsStoredFields() throws Exception {
|
||||||
|
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 6, 6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetVariableLengthManyValuesPerDocVsStoredFields() throws Exception {
|
||||||
|
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedSetVsStoredFields(atLeast(20), 1, 10, 500, 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetFixedLengthManyValuesPerDocVsStoredFields() throws Exception {
|
||||||
|
assumeTrue("Codec does not support SORTED_SET", codecSupportsSortedSet());
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedSetVsStoredFields(atLeast(20), 10, 10, 500, 1000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue