Merge remote-tracking branch 'origin/master'

This commit is contained in:
Noble Paul 2016-10-03 13:06:47 +05:30
commit 08667208ba
5 changed files with 213 additions and 127 deletions

View File

@ -443,8 +443,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
Bits docsWithField;
if (entry.format == SPARSE_COMPRESSED) {
// TODO: make a real iterator in this case!
docsWithField = getSparseLiveBits(entry);
return getSparseNumericDocValues(entry);
} else {
if (entry.missingOffset == ALL_MISSING) {
return DocValues.emptyNumeric();
@ -566,8 +565,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
};
}
case SPARSE_COMPRESSED:
final SparseBits docsWithField = getSparseLiveBits(entry);
final LongValues values = getNumeric(entry.nonMissingValues);
final SparseNumericDocValues values = getSparseNumericDocValues(entry);
final long missingValue;
switch (entry.numberType) {
case ORDINAL:
@ -579,141 +577,125 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
default:
throw new AssertionError();
}
return new SparseLongValues(docsWithField, values, missingValue);
return new SparseNumericDocValuesRandomAccessWrapper(values, missingValue);
default:
throw new AssertionError();
}
}
static final class SparseBits implements Bits {
static final class SparseNumericDocValues extends NumericDocValues {
final long maxDoc, docIDsLength, firstDocId;
final LongValues docIds;
final int docIDsLength;
final LongValues docIds, values;
long index; // index of docId in docIds
long docId; // doc ID at index
long nextDocId; // doc ID at (index+1)
int index, doc;
SparseBits(long maxDoc, long docIDsLength, LongValues docIDs) {
if (docIDsLength > 0 && maxDoc <= docIDs.get(docIDsLength - 1)) {
throw new IllegalArgumentException("maxDoc must be > the last element of docIDs");
}
this.maxDoc = maxDoc;
SparseNumericDocValues(int docIDsLength, LongValues docIDs, LongValues values) {
this.docIDsLength = docIDsLength;
this.docIds = docIDs;
this.firstDocId = docIDsLength == 0 ? maxDoc : docIDs.get(0);
this.values = values;
reset();
}
private void reset() {
void reset() {
index = -1;
this.docId = -1;
this.nextDocId = firstDocId;
doc = -1;
}
/** Gallop forward and stop as soon as an index is found that is greater than
* the given docId. {@code index} will store an index that stores a value
* that is &lt;= {@code docId} while the return value will give an index
* that stores a value that is &gt; {@code docId}. These indices can then be
* used to binary search. */
private long gallop(long docId) {
index++;
this.docId = nextDocId;
long hiIndex = index + 1;
@Override
public int docID() {
return doc;
}
while (true) {
@Override
public int nextDoc() throws IOException {
if (index >= docIDsLength - 1) {
index = docIDsLength;
return doc = NO_MORE_DOCS;
}
return doc = (int) docIds.get(++index);
}
@Override
public int advance(int target) throws IOException {
long loIndex = index;
long step = 1;
long hiIndex;
int hiDoc;
// gallop forward by exponentially growing the interval
// in order to find an interval so that the target doc
// is in ]lo, hi]. Compared to a regular binary search,
// this optimizes the case that the caller performs many
// advance calls by small deltas
do {
hiIndex = index + step;
if (hiIndex >= docIDsLength) {
hiIndex = docIDsLength;
nextDocId = maxDoc;
hiDoc = NO_MORE_DOCS;
break;
}
final long hiDocId = docIds.get(hiIndex);
if (hiDocId > docId) {
nextDocId = hiDocId;
hiDoc = (int) docIds.get(hiIndex);
if (hiDoc >= target) {
break;
}
step <<= 1;
} while (true);
final long delta = hiIndex - index;
index = hiIndex;
this.docId = hiDocId;
hiIndex += delta << 1; // double the step each time
}
return hiIndex;
}
private void binarySearch(long hiIndex, long docId) {
while (index + 1 < hiIndex) {
final long midIndex = (index + hiIndex) >>> 1;
final long midDocId = docIds.get(midIndex);
if (midDocId > docId) {
// now binary search
while (loIndex + 1 < hiIndex) {
final long midIndex = (loIndex + 1 + hiIndex) >>> 1;
final int midDoc = (int) docIds.get(midIndex);
if (midDoc >= target) {
hiIndex = midIndex;
nextDocId = midDocId;
hiDoc = midDoc;
} else {
index = midIndex;
this.docId = midDocId;
}
loIndex = midIndex;
}
}
private boolean checkInvariants(long nextIndex, long docId) {
assert this.docId <= docId;
assert this.nextDocId > docId;
assert (index == -1 && this.docId == -1) || this.docId == docIds.get(index);
assert (nextIndex == docIDsLength && nextDocId == maxDoc) || nextDocId == docIds.get(nextIndex);
return true;
}
private void exponentialSearch(long docId) {
// seek forward by doubling the interval on each iteration
final long hiIndex = gallop(docId);
assert checkInvariants(hiIndex, docId);
// now perform the actual binary search
binarySearch(hiIndex, docId);
}
boolean get(final long docId) {
if (docId < this.docId) {
// reading doc IDs backward, go back to the start
reset();
}
if (docId >= nextDocId) {
exponentialSearch(docId);
}
assert checkInvariants(index + 1, docId);
return docId == this.docId;
index = (int) hiIndex;
return doc = hiDoc;
}
@Override
public boolean get(int index) {
return get((long) index);
public long longValue() {
assert index >= 0;
assert index < docIDsLength;
return values.get(index);
}
@Override
public int length() {
return Math.toIntExact(maxDoc);
public long cost() {
return docIDsLength;
}
}
static class SparseLongValues extends LongValues {
static class SparseNumericDocValuesRandomAccessWrapper extends LongValues {
final SparseBits docsWithField;
final LongValues values;
final SparseNumericDocValues values;
final long missingValue;
SparseLongValues(SparseBits docsWithField, LongValues values, long missingValue) {
this.docsWithField = docsWithField;
SparseNumericDocValuesRandomAccessWrapper(SparseNumericDocValues values, long missingValue) {
this.values = values;
this.missingValue = missingValue;
}
@Override
public long get(long docId) {
if (docsWithField.get(docId)) {
return values.get(docsWithField.index);
public long get(long longIndex) {
final int index = Math.toIntExact(longIndex);
int doc = values.docID();
if (doc >= index) {
values.reset();
}
assert values.docID() < index;
try {
doc = values.advance(index);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (doc == index) {
return values.longValue();
} else {
return missingValue;
}
@ -837,6 +819,47 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
final LegacyBinaryDocValues binary = getLegacyBinary(field);
NumericEntry entry = ords.get(field.name);
final LongValues ordinals = getNumeric(entry);
if (entry.format == SPARSE_COMPRESSED) {
final SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) ordinals).values;
return new SortedDocValues() {
@Override
public int ordValue() {
return (int) sparseValues.longValue();
}
@Override
public BytesRef lookupOrd(int ord) {
return binary.get(ord);
}
@Override
public int getValueCount() {
return valueCount;
}
@Override
public int docID() {
return sparseValues.docID();
}
@Override
public int nextDoc() throws IOException {
return sparseValues.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return sparseValues.advance(target);
}
@Override
public long cost() {
return sparseValues.cost();
}
};
}
return new SortedDocValues() {
private int docID = -1;
private int ord;
@ -927,12 +950,43 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
if (ss.format == SORTED_SINGLE_VALUED) {
NumericEntry numericEntry = numerics.get(field.name);
final LongValues values = getNumeric(numericEntry);
final Bits docsWithField;
if (numericEntry.format == SPARSE_COMPRESSED) {
docsWithField = ((SparseLongValues) values).docsWithField;
} else {
docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
return new SortedNumericDocValues() {
@Override
public long nextValue() throws IOException {
return sparseValues.longValue();
}
@Override
public int docValueCount() {
return 1;
}
@Override
public int docID() {
return sparseValues.docID();
}
@Override
public int nextDoc() throws IOException {
return sparseValues.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return sparseValues.advance(target);
}
@Override
public long cost() {
return sparseValues.cost();
}
};
}
final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
return new SortedNumericDocValues() {
int docID = -1;
@ -1192,7 +1246,8 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
final long valueCount = binaries.get(field.name).count;
final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
final LongValues ordinals = getNumeric(ords.get(field.name));
final NumericEntry ordinalsEntry = ords.get(field.name);
final LongValues ordinals = getNumeric(ordinalsEntry);
final long[] table = ss.table;
final int[] offsets = ss.tableOffsets;
@ -1273,10 +1328,11 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close
}
}
private SparseBits getSparseLiveBits(NumericEntry entry) throws IOException {
private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException {
final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset);
final LongValues docIDs = DirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData);
return new SparseBits(maxDoc, entry.numDocsWithValue, docIDs);
final LongValues values = getNumeric(entry.nonMissingValues); // cannot be sparse
return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values);
}
@Override

View File

@ -30,6 +30,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
private final LegacyNumericDocValues values;
private final int maxDoc;
private int docID = -1;
private long value;
public LegacyNumericDocValuesWrapper(Bits docsWithField, LegacyNumericDocValues values) {
this.docsWithField = docsWithField;
@ -51,7 +52,8 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
public int nextDoc() {
docID++;
while (docID < maxDoc) {
if (docsWithField.get(docID)) {
value = values.get(docID);
if (value != 0 || docsWithField.get(docID)) {
return docID;
}
docID++;
@ -82,7 +84,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues {
@Override
public long longValue() {
return values.get(docID);
return value;
}
@Override

View File

@ -31,8 +31,8 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseBits;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseLongValues;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValues;
import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -61,6 +61,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
@ -427,13 +428,13 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
}
}
public void testSparseLongValues() {
public void testSparseLongValues() throws IOException {
final int iters = atLeast(5);
for (int iter = 0; iter < iters; ++iter) {
final int numDocs = TestUtil.nextInt(random(), 0, 100);
final long[] docIds = new long[numDocs];
final int[] docIds = new int[numDocs];
final long[] values = new long[numDocs];
final long maxDoc;
final int maxDoc;
if (numDocs == 0) {
maxDoc = 1 + random().nextInt(10);
} else {
@ -459,35 +460,51 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT
return values[Math.toIntExact(index)];
}
};
final SparseBits liveBits = new SparseBits(maxDoc, numDocs, docIdsValues);
// random-access
for (int i = 0; i < 2000; ++i) {
final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1);
final boolean exists = liveBits.get(Math.toIntExact(docId));
assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists);
}
final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
// sequential access
for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
final boolean exists = liveBits.get(Math.toIntExact(docId));
assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists);
assertEquals(-1, sparseValues.docID());
for (int i = 0; i < docIds.length; ++i) {
assertEquals(docIds[i], sparseValues.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
// advance
for (int i = 0; i < 2000; ++i) {
final int target = TestUtil.nextInt(random(), 0, (int) maxDoc);
int index = Arrays.binarySearch(docIds, target);
if (index < 0) {
index = -1 - index;
}
sparseValues.reset();
if (index > 0) {
assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
}
if (index == docIds.length) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
} else {
assertEquals(docIds[index], sparseValues.advance(target));
}
}
final SparseLongValues sparseValues = new SparseLongValues(liveBits, valuesValues, missingValue);
final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
// random-access
for (int i = 0; i < 2000; ++i) {
final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1);
final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
final int idx = Arrays.binarySearch(docIds, docId);
final long value = sparseValues.get(docId);
final long value = raWrapper.get(docId);
if (idx >= 0) {
assertEquals(values[idx], value);
} else {
assertEquals(missingValue, value);
}
}
// sequential access
for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
final int idx = Arrays.binarySearch(docIds, docId);
final long value = sparseValues.get(docId);
final long value = raWrapper.get(docId);
if (idx >= 0) {
assertEquals(values[idx], value);
} else {

View File

@ -18,6 +18,10 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -411,11 +415,16 @@ public class TestBooleanRewrites extends LuceneTestCase {
private void assertEquals(TopDocs td1, TopDocs td2) {
assertEquals(td1.totalHits, td2.totalHits);
assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
for (int i = 0; i < td1.scoreDocs.length; ++i) {
ScoreDoc sd1 = td1.scoreDocs[i];
ScoreDoc sd2 = td2.scoreDocs[i];
assertEquals(sd1.doc, sd2.doc);
assertEquals(sd1.score, sd2.score, 0.01f);
Map<Integer, Float> expectedScores = Arrays.stream(td1.scoreDocs).collect(Collectors.toMap(sd -> sd.doc, sd -> sd.score));
Set<Integer> actualResultSet = Arrays.stream(td2.scoreDocs).map(sd -> sd.doc).collect(Collectors.toSet());
assertEquals("Set of matching documents differs",
expectedScores.keySet(), actualResultSet);
for (ScoreDoc scoreDoc : td2.scoreDocs) {
final float expectedScore = expectedScores.get(scoreDoc.doc);
final float actualScore = scoreDoc.score;
assertEquals(expectedScore, actualScore, 10e-5);
}
}
}

View File

@ -96,6 +96,8 @@ public class TestManagedSchemaThreadSafety extends SolrTestCaseJ4 {
@AfterClass
public static void stopZkServer() throws Exception {
zkServer.shutdown();
zkServer = null;
loaderPath = null;
}
@Test