mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
08667208ba
|
@ -443,8 +443,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
Bits docsWithField;
|
||||
|
||||
if (entry.format == SPARSE_COMPRESSED) {
|
||||
// TODO: make a real iterator in this case!
|
||||
docsWithField = getSparseLiveBits(entry);
|
||||
return getSparseNumericDocValues(entry);
|
||||
} else {
|
||||
if (entry.missingOffset == ALL_MISSING) {
|
||||
return DocValues.emptyNumeric();
|
||||
|
@ -566,8 +565,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
};
|
||||
}
|
||||
case SPARSE_COMPRESSED:
|
||||
final SparseBits docsWithField = getSparseLiveBits(entry);
|
||||
final LongValues values = getNumeric(entry.nonMissingValues);
|
||||
final SparseNumericDocValues values = getSparseNumericDocValues(entry);
|
||||
final long missingValue;
|
||||
switch (entry.numberType) {
|
||||
case ORDINAL:
|
||||
|
@ -579,141 +577,125 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
return new SparseLongValues(docsWithField, values, missingValue);
|
||||
return new SparseNumericDocValuesRandomAccessWrapper(values, missingValue);
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
static final class SparseBits implements Bits {
|
||||
static final class SparseNumericDocValues extends NumericDocValues {
|
||||
|
||||
final long maxDoc, docIDsLength, firstDocId;
|
||||
final LongValues docIds;
|
||||
final int docIDsLength;
|
||||
final LongValues docIds, values;
|
||||
|
||||
long index; // index of docId in docIds
|
||||
long docId; // doc ID at index
|
||||
long nextDocId; // doc ID at (index+1)
|
||||
int index, doc;
|
||||
|
||||
SparseBits(long maxDoc, long docIDsLength, LongValues docIDs) {
|
||||
if (docIDsLength > 0 && maxDoc <= docIDs.get(docIDsLength - 1)) {
|
||||
throw new IllegalArgumentException("maxDoc must be > the last element of docIDs");
|
||||
}
|
||||
this.maxDoc = maxDoc;
|
||||
SparseNumericDocValues(int docIDsLength, LongValues docIDs, LongValues values) {
|
||||
this.docIDsLength = docIDsLength;
|
||||
this.docIds = docIDs;
|
||||
this.firstDocId = docIDsLength == 0 ? maxDoc : docIDs.get(0);
|
||||
this.values = values;
|
||||
reset();
|
||||
}
|
||||
|
||||
private void reset() {
|
||||
void reset() {
|
||||
index = -1;
|
||||
this.docId = -1;
|
||||
this.nextDocId = firstDocId;
|
||||
doc = -1;
|
||||
}
|
||||
|
||||
/** Gallop forward and stop as soon as an index is found that is greater than
|
||||
* the given docId. {@code index} will store an index that stores a value
|
||||
* that is <= {@code docId} while the return value will give an index
|
||||
* that stores a value that is > {@code docId}. These indices can then be
|
||||
* used to binary search. */
|
||||
private long gallop(long docId) {
|
||||
index++;
|
||||
this.docId = nextDocId;
|
||||
long hiIndex = index + 1;
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (index >= docIDsLength - 1) {
|
||||
index = docIDsLength;
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = (int) docIds.get(++index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
long loIndex = index;
|
||||
long step = 1;
|
||||
long hiIndex;
|
||||
int hiDoc;
|
||||
|
||||
// gallop forward by exponentially growing the interval
|
||||
// in order to find an interval so that the target doc
|
||||
// is in ]lo, hi]. Compared to a regular binary search,
|
||||
// this optimizes the case that the caller performs many
|
||||
// advance calls by small deltas
|
||||
do {
|
||||
hiIndex = index + step;
|
||||
if (hiIndex >= docIDsLength) {
|
||||
hiIndex = docIDsLength;
|
||||
nextDocId = maxDoc;
|
||||
hiDoc = NO_MORE_DOCS;
|
||||
break;
|
||||
}
|
||||
|
||||
final long hiDocId = docIds.get(hiIndex);
|
||||
if (hiDocId > docId) {
|
||||
nextDocId = hiDocId;
|
||||
hiDoc = (int) docIds.get(hiIndex);
|
||||
if (hiDoc >= target) {
|
||||
break;
|
||||
}
|
||||
step <<= 1;
|
||||
} while (true);
|
||||
|
||||
final long delta = hiIndex - index;
|
||||
index = hiIndex;
|
||||
this.docId = hiDocId;
|
||||
hiIndex += delta << 1; // double the step each time
|
||||
}
|
||||
return hiIndex;
|
||||
}
|
||||
|
||||
private void binarySearch(long hiIndex, long docId) {
|
||||
while (index + 1 < hiIndex) {
|
||||
final long midIndex = (index + hiIndex) >>> 1;
|
||||
final long midDocId = docIds.get(midIndex);
|
||||
if (midDocId > docId) {
|
||||
// now binary search
|
||||
while (loIndex + 1 < hiIndex) {
|
||||
final long midIndex = (loIndex + 1 + hiIndex) >>> 1;
|
||||
final int midDoc = (int) docIds.get(midIndex);
|
||||
if (midDoc >= target) {
|
||||
hiIndex = midIndex;
|
||||
nextDocId = midDocId;
|
||||
hiDoc = midDoc;
|
||||
} else {
|
||||
index = midIndex;
|
||||
this.docId = midDocId;
|
||||
loIndex = midIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean checkInvariants(long nextIndex, long docId) {
|
||||
assert this.docId <= docId;
|
||||
assert this.nextDocId > docId;
|
||||
assert (index == -1 && this.docId == -1) || this.docId == docIds.get(index);
|
||||
assert (nextIndex == docIDsLength && nextDocId == maxDoc) || nextDocId == docIds.get(nextIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
private void exponentialSearch(long docId) {
|
||||
// seek forward by doubling the interval on each iteration
|
||||
final long hiIndex = gallop(docId);
|
||||
assert checkInvariants(hiIndex, docId);
|
||||
|
||||
// now perform the actual binary search
|
||||
binarySearch(hiIndex, docId);
|
||||
}
|
||||
|
||||
boolean get(final long docId) {
|
||||
if (docId < this.docId) {
|
||||
// reading doc IDs backward, go back to the start
|
||||
reset();
|
||||
}
|
||||
|
||||
if (docId >= nextDocId) {
|
||||
exponentialSearch(docId);
|
||||
}
|
||||
|
||||
assert checkInvariants(index + 1, docId);
|
||||
return docId == this.docId;
|
||||
index = (int) hiIndex;
|
||||
return doc = hiDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return get((long) index);
|
||||
public long longValue() {
|
||||
assert index >= 0;
|
||||
assert index < docIDsLength;
|
||||
return values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return Math.toIntExact(maxDoc);
|
||||
public long cost() {
|
||||
return docIDsLength;
|
||||
}
|
||||
}
|
||||
|
||||
static class SparseLongValues extends LongValues {
|
||||
static class SparseNumericDocValuesRandomAccessWrapper extends LongValues {
|
||||
|
||||
final SparseBits docsWithField;
|
||||
final LongValues values;
|
||||
final SparseNumericDocValues values;
|
||||
final long missingValue;
|
||||
|
||||
SparseLongValues(SparseBits docsWithField, LongValues values, long missingValue) {
|
||||
this.docsWithField = docsWithField;
|
||||
SparseNumericDocValuesRandomAccessWrapper(SparseNumericDocValues values, long missingValue) {
|
||||
this.values = values;
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(long docId) {
|
||||
if (docsWithField.get(docId)) {
|
||||
return values.get(docsWithField.index);
|
||||
public long get(long longIndex) {
|
||||
final int index = Math.toIntExact(longIndex);
|
||||
int doc = values.docID();
|
||||
if (doc >= index) {
|
||||
values.reset();
|
||||
}
|
||||
assert values.docID() < index;
|
||||
try {
|
||||
doc = values.advance(index);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
if (doc == index) {
|
||||
return values.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
}
|
||||
|
@ -837,6 +819,47 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
final LegacyBinaryDocValues binary = getLegacyBinary(field);
|
||||
NumericEntry entry = ords.get(field.name);
|
||||
final LongValues ordinals = getNumeric(entry);
|
||||
if (entry.format == SPARSE_COMPRESSED) {
|
||||
final SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) ordinals).values;
|
||||
return new SortedDocValues() {
|
||||
|
||||
@Override
|
||||
public int ordValue() {
|
||||
return (int) sparseValues.longValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(int ord) {
|
||||
return binary.get(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return sparseValues.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return sparseValues.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return sparseValues.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return sparseValues.cost();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
return new SortedDocValues() {
|
||||
private int docID = -1;
|
||||
private int ord;
|
||||
|
@ -927,12 +950,43 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
if (ss.format == SORTED_SINGLE_VALUED) {
|
||||
NumericEntry numericEntry = numerics.get(field.name);
|
||||
final LongValues values = getNumeric(numericEntry);
|
||||
final Bits docsWithField;
|
||||
if (numericEntry.format == SPARSE_COMPRESSED) {
|
||||
docsWithField = ((SparseLongValues) values).docsWithField;
|
||||
} else {
|
||||
docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
|
||||
SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
|
||||
return new SortedNumericDocValues() {
|
||||
|
||||
@Override
|
||||
public long nextValue() throws IOException {
|
||||
return sparseValues.longValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docValueCount() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return sparseValues.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return sparseValues.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return sparseValues.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return sparseValues.cost();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
|
||||
return new SortedNumericDocValues() {
|
||||
int docID = -1;
|
||||
|
||||
|
@ -949,7 +1003,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
docID = NO_MORE_DOCS;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (docsWithField.get(docID)) {
|
||||
// TODO: use .nextSetBit here, at least!!
|
||||
break;
|
||||
|
@ -1192,7 +1246,8 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
|
||||
final long valueCount = binaries.get(field.name).count;
|
||||
final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
|
||||
final LongValues ordinals = getNumeric(ords.get(field.name));
|
||||
final NumericEntry ordinalsEntry = ords.get(field.name);
|
||||
final LongValues ordinals = getNumeric(ordinalsEntry);
|
||||
|
||||
final long[] table = ss.table;
|
||||
final int[] offsets = ss.tableOffsets;
|
||||
|
@ -1273,10 +1328,11 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
|
|||
}
|
||||
}
|
||||
|
||||
private SparseBits getSparseLiveBits(NumericEntry entry) throws IOException {
|
||||
private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException {
|
||||
final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset);
|
||||
final LongValues docIDs = DirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData);
|
||||
return new SparseBits(maxDoc, entry.numDocsWithValue, docIDs);
|
||||
final LongValues values = getNumeric(entry.nonMissingValues); // cannot be sparse
|
||||
return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -30,6 +30,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
|
|||
private final LegacyNumericDocValues values;
|
||||
private final int maxDoc;
|
||||
private int docID = -1;
|
||||
private long value;
|
||||
|
||||
public LegacyNumericDocValuesWrapper(Bits docsWithField, LegacyNumericDocValues values) {
|
||||
this.docsWithField = docsWithField;
|
||||
|
@ -51,7 +52,8 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
|
|||
public int nextDoc() {
|
||||
docID++;
|
||||
while (docID < maxDoc) {
|
||||
if (docsWithField.get(docID)) {
|
||||
value = values.get(docID);
|
||||
if (value != 0 || docsWithField.get(docID)) {
|
||||
return docID;
|
||||
}
|
||||
docID++;
|
||||
|
@ -82,7 +84,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
|
|||
|
||||
@Override
|
||||
public long longValue() {
|
||||
return values.get(docID);
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,8 +31,8 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseBits;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseLongValues;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValues;
|
||||
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -61,6 +61,7 @@ import org.apache.lucene.index.SortedSetDocValues;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
|
@ -427,13 +428,13 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
|
|||
}
|
||||
}
|
||||
|
||||
public void testSparseLongValues() {
|
||||
public void testSparseLongValues() throws IOException {
|
||||
final int iters = atLeast(5);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final int numDocs = TestUtil.nextInt(random(), 0, 100);
|
||||
final long[] docIds = new long[numDocs];
|
||||
final int[] docIds = new int[numDocs];
|
||||
final long[] values = new long[numDocs];
|
||||
final long maxDoc;
|
||||
final int maxDoc;
|
||||
if (numDocs == 0) {
|
||||
maxDoc = 1 + random().nextInt(10);
|
||||
} else {
|
||||
|
@ -459,35 +460,51 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
|
|||
return values[Math.toIntExact(index)];
|
||||
}
|
||||
};
|
||||
final SparseBits liveBits = new SparseBits(maxDoc, numDocs, docIdsValues);
|
||||
// random-access
|
||||
for (int i = 0; i < 2000; ++i) {
|
||||
final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1);
|
||||
final boolean exists = liveBits.get(Math.toIntExact(docId));
|
||||
assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists);
|
||||
}
|
||||
final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
|
||||
|
||||
// sequential access
|
||||
for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
|
||||
final boolean exists = liveBits.get(Math.toIntExact(docId));
|
||||
assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists);
|
||||
assertEquals(-1, sparseValues.docID());
|
||||
for (int i = 0; i < docIds.length; ++i) {
|
||||
assertEquals(docIds[i], sparseValues.nextDoc());
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
|
||||
|
||||
// advance
|
||||
for (int i = 0; i < 2000; ++i) {
|
||||
final int target = TestUtil.nextInt(random(), 0, (int) maxDoc);
|
||||
int index = Arrays.binarySearch(docIds, target);
|
||||
if (index < 0) {
|
||||
index = -1 - index;
|
||||
}
|
||||
sparseValues.reset();
|
||||
if (index > 0) {
|
||||
assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
|
||||
}
|
||||
if (index == docIds.length) {
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
|
||||
} else {
|
||||
assertEquals(docIds[index], sparseValues.advance(target));
|
||||
}
|
||||
}
|
||||
|
||||
final SparseLongValues sparseValues = new SparseLongValues(liveBits, valuesValues, missingValue);
|
||||
final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
|
||||
|
||||
// random-access
|
||||
for (int i = 0; i < 2000; ++i) {
|
||||
final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1);
|
||||
final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
|
||||
final int idx = Arrays.binarySearch(docIds, docId);
|
||||
final long value = sparseValues.get(docId);
|
||||
final long value = raWrapper.get(docId);
|
||||
if (idx >= 0) {
|
||||
assertEquals(values[idx], value);
|
||||
} else {
|
||||
assertEquals(missingValue, value);
|
||||
}
|
||||
}
|
||||
|
||||
// sequential access
|
||||
for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
|
||||
final int idx = Arrays.binarySearch(docIds, docId);
|
||||
final long value = sparseValues.get(docId);
|
||||
final long value = raWrapper.get(docId);
|
||||
if (idx >= 0) {
|
||||
assertEquals(values[idx], value);
|
||||
} else {
|
||||
|
|
|
@ -18,6 +18,10 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -411,11 +415,16 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
private void assertEquals(TopDocs td1, TopDocs td2) {
|
||||
assertEquals(td1.totalHits, td2.totalHits);
|
||||
assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
|
||||
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
||||
ScoreDoc sd1 = td1.scoreDocs[i];
|
||||
ScoreDoc sd2 = td2.scoreDocs[i];
|
||||
assertEquals(sd1.doc, sd2.doc);
|
||||
assertEquals(sd1.score, sd2.score, 0.01f);
|
||||
Map<Integer, Float> expectedScores = Arrays.stream(td1.scoreDocs).collect(Collectors.toMap(sd -> sd.doc, sd -> sd.score));
|
||||
Set<Integer> actualResultSet = Arrays.stream(td2.scoreDocs).map(sd -> sd.doc).collect(Collectors.toSet());
|
||||
|
||||
assertEquals("Set of matching documents differs",
|
||||
expectedScores.keySet(), actualResultSet);
|
||||
|
||||
for (ScoreDoc scoreDoc : td2.scoreDocs) {
|
||||
final float expectedScore = expectedScores.get(scoreDoc.doc);
|
||||
final float actualScore = scoreDoc.score;
|
||||
assertEquals(expectedScore, actualScore, 10e-5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -96,6 +96,8 @@ public class TestManagedSchemaThreadSafety extends SolrTestCaseJ4 {
|
|||
@AfterClass
|
||||
public static void stopZkServer() throws Exception {
|
||||
zkServer.shutdown();
|
||||
zkServer = null;
|
||||
loaderPath = null;
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue