From 632d129871ab35a341b9215d3bc99043cb4cc041 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 3 Oct 2016 09:07:23 +0200 Subject: [PATCH 1/4] LUCENE-7469: Avoid TestBooleanRewrites test failures due to floating-point inaccuracy issues. --- .../lucene/search/TestBooleanRewrites.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 3ec2dd386f9..5e6590667aa 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -18,6 +18,10 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -411,11 +415,16 @@ public class TestBooleanRewrites extends LuceneTestCase { private void assertEquals(TopDocs td1, TopDocs td2) { assertEquals(td1.totalHits, td2.totalHits); assertEquals(td1.scoreDocs.length, td2.scoreDocs.length); - for (int i = 0; i < td1.scoreDocs.length; ++i) { - ScoreDoc sd1 = td1.scoreDocs[i]; - ScoreDoc sd2 = td2.scoreDocs[i]; - assertEquals(sd1.doc, sd2.doc); - assertEquals(sd1.score, sd2.score, 0.01f); + Map expectedScores = Arrays.stream(td1.scoreDocs).collect(Collectors.toMap(sd -> sd.doc, sd -> sd.score)); + Set actualResultSet = Arrays.stream(td2.scoreDocs).map(sd -> sd.doc).collect(Collectors.toSet()); + + assertEquals("Set of matching documents differs", + expectedScores.keySet(), actualResultSet); + + for (ScoreDoc scoreDoc : td2.scoreDocs) { + final float expectedScore = expectedScores.get(scoreDoc.doc); + final float actualScore = scoreDoc.score; + assertEquals(expectedScore, actualScore, 10e-5); } } } From cc4c780227e999339e083cababff96912c4fbb53 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Mon, 3 Oct 2016 10:18:45 +0300 Subject: [PATCH 2/4] SOLR-9554: clear statics to fix the test failure --- .../org/apache/solr/schema/TestManagedSchemaThreadSafety.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java b/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java index 043632e81ed..e4a5c482eef 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java +++ b/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java @@ -96,6 +96,8 @@ public class TestManagedSchemaThreadSafety extends SolrTestCaseJ4 { @AfterClass public static void stopZkServer() throws Exception { zkServer.shutdown(); + zkServer = null; + loaderPath = null; } @Test From d0ff2d2735170b226e1fead100b9a4c0c0dcb50a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 3 Oct 2016 09:14:06 +0200 Subject: [PATCH 3/4] LUCENE-7459: LegacyNumericDocValuesWrapper should check the value before the bits for docs that have a value. --- .../apache/lucene/index/LegacyNumericDocValuesWrapper.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java b/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java index d9c997c3c1b..64108e178c2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/LegacyNumericDocValuesWrapper.java @@ -30,6 +30,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues { private final LegacyNumericDocValues values; private final int maxDoc; private int docID = -1; + private long value; public LegacyNumericDocValuesWrapper(Bits docsWithField, LegacyNumericDocValues values) { this.docsWithField = docsWithField; @@ -51,7 +52,8 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues { public int nextDoc() { docID++; while (docID < maxDoc) { - if (docsWithField.get(docID)) { + value = values.get(docID); + if (value != 0 || docsWithField.get(docID)) { return docID; } docID++; @@ -82,7 +84,7 @@ public final class LegacyNumericDocValuesWrapper extends NumericDocValues { @Override public long longValue() { - return values.get(docID); + return value; } @Override From 2f88bc80c2c1afed975199adb3f340fcec8179aa Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 3 Oct 2016 09:20:29 +0200 Subject: [PATCH 4/4] LUCENE-7457: Make Lucene54DocValuesFormat's sparse case actually implement an iterator. --- .../lucene54/Lucene54DocValuesProducer.java | 258 +++++++++++------- .../lucene54/TestLucene54DocValuesFormat.java | 55 ++-- 2 files changed, 193 insertions(+), 120 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java index 62e631c474c..9356aedf973 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesProducer.java @@ -443,8 +443,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close Bits docsWithField; if (entry.format == SPARSE_COMPRESSED) { - // TODO: make a real iterator in this case! - docsWithField = getSparseLiveBits(entry); + return getSparseNumericDocValues(entry); } else { if (entry.missingOffset == ALL_MISSING) { return DocValues.emptyNumeric(); @@ -566,8 +565,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close }; } case SPARSE_COMPRESSED: - final SparseBits docsWithField = getSparseLiveBits(entry); - final LongValues values = getNumeric(entry.nonMissingValues); + final SparseNumericDocValues values = getSparseNumericDocValues(entry); final long missingValue; switch (entry.numberType) { case ORDINAL: @@ -579,141 +577,125 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close default: throw new AssertionError(); } - return new SparseLongValues(docsWithField, values, missingValue); + return new SparseNumericDocValuesRandomAccessWrapper(values, missingValue); default: throw new AssertionError(); } } - static final class SparseBits implements Bits { + static final class SparseNumericDocValues extends NumericDocValues { - final long maxDoc, docIDsLength, firstDocId; - final LongValues docIds; + final int docIDsLength; + final LongValues docIds, values; - long index; // index of docId in docIds - long docId; // doc ID at index - long nextDocId; // doc ID at (index+1) + int index, doc; - SparseBits(long maxDoc, long docIDsLength, LongValues docIDs) { - if (docIDsLength > 0 && maxDoc <= docIDs.get(docIDsLength - 1)) { - throw new IllegalArgumentException("maxDoc must be > the last element of docIDs"); - } - this.maxDoc = maxDoc; + SparseNumericDocValues(int docIDsLength, LongValues docIDs, LongValues values) { this.docIDsLength = docIDsLength; this.docIds = docIDs; - this.firstDocId = docIDsLength == 0 ? maxDoc : docIDs.get(0); + this.values = values; reset(); } - private void reset() { + void reset() { index = -1; - this.docId = -1; - this.nextDocId = firstDocId; + doc = -1; } - /** Gallop forward and stop as soon as an index is found that is greater than - * the given docId. {@code index} will store an index that stores a value - * that is <= {@code docId} while the return value will give an index - * that stores a value that is > {@code docId}. These indices can then be - * used to binary search. */ - private long gallop(long docId) { - index++; - this.docId = nextDocId; - long hiIndex = index + 1; + @Override + public int docID() { + return doc; + } - while (true) { + @Override + public int nextDoc() throws IOException { + if (index >= docIDsLength - 1) { + index = docIDsLength; + return doc = NO_MORE_DOCS; + } + return doc = (int) docIds.get(++index); + } + + @Override + public int advance(int target) throws IOException { + long loIndex = index; + long step = 1; + long hiIndex; + int hiDoc; + + // gallop forward by exponentially growing the interval + // in order to find an interval so that the target doc + // is in ]lo, hi]. Compared to a regular binary search, + // this optimizes the case that the caller performs many + // advance calls by small deltas + do { + hiIndex = index + step; if (hiIndex >= docIDsLength) { hiIndex = docIDsLength; - nextDocId = maxDoc; + hiDoc = NO_MORE_DOCS; break; } - - final long hiDocId = docIds.get(hiIndex); - if (hiDocId > docId) { - nextDocId = hiDocId; + hiDoc = (int) docIds.get(hiIndex); + if (hiDoc >= target) { break; } + step <<= 1; + } while (true); - final long delta = hiIndex - index; - index = hiIndex; - this.docId = hiDocId; - hiIndex += delta << 1; // double the step each time - } - return hiIndex; - } - - private void binarySearch(long hiIndex, long docId) { - while (index + 1 < hiIndex) { - final long midIndex = (index + hiIndex) >>> 1; - final long midDocId = docIds.get(midIndex); - if (midDocId > docId) { + // now binary search + while (loIndex + 1 < hiIndex) { + final long midIndex = (loIndex + 1 + hiIndex) >>> 1; + final int midDoc = (int) docIds.get(midIndex); + if (midDoc >= target) { hiIndex = midIndex; - nextDocId = midDocId; + hiDoc = midDoc; } else { - index = midIndex; - this.docId = midDocId; + loIndex = midIndex; } } - } - private boolean checkInvariants(long nextIndex, long docId) { - assert this.docId <= docId; - assert this.nextDocId > docId; - assert (index == -1 && this.docId == -1) || this.docId == docIds.get(index); - assert (nextIndex == docIDsLength && nextDocId == maxDoc) || nextDocId == docIds.get(nextIndex); - return true; - } - - private void exponentialSearch(long docId) { - // seek forward by doubling the interval on each iteration - final long hiIndex = gallop(docId); - assert checkInvariants(hiIndex, docId); - - // now perform the actual binary search - binarySearch(hiIndex, docId); - } - - boolean get(final long docId) { - if (docId < this.docId) { - // reading doc IDs backward, go back to the start - reset(); - } - - if (docId >= nextDocId) { - exponentialSearch(docId); - } - - assert checkInvariants(index + 1, docId); - return docId == this.docId; + index = (int) hiIndex; + return doc = hiDoc; } @Override - public boolean get(int index) { - return get((long) index); + public long longValue() { + assert index >= 0; + assert index < docIDsLength; + return values.get(index); } @Override - public int length() { - return Math.toIntExact(maxDoc); + public long cost() { + return docIDsLength; } } - static class SparseLongValues extends LongValues { + static class SparseNumericDocValuesRandomAccessWrapper extends LongValues { - final SparseBits docsWithField; - final LongValues values; + final SparseNumericDocValues values; final long missingValue; - SparseLongValues(SparseBits docsWithField, LongValues values, long missingValue) { - this.docsWithField = docsWithField; + SparseNumericDocValuesRandomAccessWrapper(SparseNumericDocValues values, long missingValue) { this.values = values; this.missingValue = missingValue; } @Override - public long get(long docId) { - if (docsWithField.get(docId)) { - return values.get(docsWithField.index); + public long get(long longIndex) { + final int index = Math.toIntExact(longIndex); + int doc = values.docID(); + if (doc >= index) { + values.reset(); + } + assert values.docID() < index; + try { + doc = values.advance(index); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (doc == index) { + return values.longValue(); } else { return missingValue; } @@ -837,6 +819,47 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close final LegacyBinaryDocValues binary = getLegacyBinary(field); NumericEntry entry = ords.get(field.name); final LongValues ordinals = getNumeric(entry); + if (entry.format == SPARSE_COMPRESSED) { + final SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) ordinals).values; + return new SortedDocValues() { + + @Override + public int ordValue() { + return (int) sparseValues.longValue(); + } + + @Override + public BytesRef lookupOrd(int ord) { + return binary.get(ord); + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public int docID() { + return sparseValues.docID(); + } + + @Override + public int nextDoc() throws IOException { + return sparseValues.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return sparseValues.advance(target); + } + + @Override + public long cost() { + return sparseValues.cost(); + } + + }; + } return new SortedDocValues() { private int docID = -1; private int ord; @@ -927,12 +950,43 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close if (ss.format == SORTED_SINGLE_VALUED) { NumericEntry numericEntry = numerics.get(field.name); final LongValues values = getNumeric(numericEntry); - final Bits docsWithField; if (numericEntry.format == SPARSE_COMPRESSED) { - docsWithField = ((SparseLongValues) values).docsWithField; - } else { - docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc); + SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values; + return new SortedNumericDocValues() { + + @Override + public long nextValue() throws IOException { + return sparseValues.longValue(); + } + + @Override + public int docValueCount() { + return 1; + } + + @Override + public int docID() { + return sparseValues.docID(); + } + + @Override + public int nextDoc() throws IOException { + return sparseValues.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return sparseValues.advance(target); + } + + @Override + public long cost() { + return sparseValues.cost(); + } + + }; } + final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc); return new SortedNumericDocValues() { int docID = -1; @@ -949,7 +1003,7 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close docID = NO_MORE_DOCS; break; } - + if (docsWithField.get(docID)) { // TODO: use .nextSetBit here, at least!! break; @@ -1192,7 +1246,8 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException { final long valueCount = binaries.get(field.name).count; final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field); - final LongValues ordinals = getNumeric(ords.get(field.name)); + final NumericEntry ordinalsEntry = ords.get(field.name); + final LongValues ordinals = getNumeric(ordinalsEntry); final long[] table = ss.table; final int[] offsets = ss.tableOffsets; @@ -1273,10 +1328,11 @@ final class Lucene54DocValuesProducer extends DocValuesProducer implements Close } } - private SparseBits getSparseLiveBits(NumericEntry entry) throws IOException { + private SparseNumericDocValues getSparseNumericDocValues(NumericEntry entry) throws IOException { final RandomAccessInput docIdsData = this.data.randomAccessSlice(entry.missingOffset, entry.offset - entry.missingOffset); final LongValues docIDs = DirectMonotonicReader.getInstance(entry.monotonicMeta, docIdsData); - return new SparseBits(maxDoc, entry.numDocsWithValue, docIDs); + final LongValues values = getNumeric(entry.nonMissingValues); // cannot be sparse + return new SparseNumericDocValues(Math.toIntExact(entry.numDocsWithValue), docIDs, values); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java index f798148f23a..dd7cdccc542 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene54/TestLucene54DocValuesFormat.java @@ -31,8 +31,8 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingCodec; -import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseBits; -import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseLongValues; +import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValues; +import org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValuesRandomAccessWrapper; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -61,6 +61,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMFile; @@ -427,13 +428,13 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT } } - public void testSparseLongValues() { + public void testSparseLongValues() throws IOException { final int iters = atLeast(5); for (int iter = 0; iter < iters; ++iter) { final int numDocs = TestUtil.nextInt(random(), 0, 100); - final long[] docIds = new long[numDocs]; + final int[] docIds = new int[numDocs]; final long[] values = new long[numDocs]; - final long maxDoc; + final int maxDoc; if (numDocs == 0) { maxDoc = 1 + random().nextInt(10); } else { @@ -459,35 +460,51 @@ public class TestLucene54DocValuesFormat extends BaseCompressingDocValuesFormatT return values[Math.toIntExact(index)]; } }; - final SparseBits liveBits = new SparseBits(maxDoc, numDocs, docIdsValues); - // random-access - for (int i = 0; i < 2000; ++i) { - final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1); - final boolean exists = liveBits.get(Math.toIntExact(docId)); - assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists); - } + final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues); + // sequential access - for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) { - final boolean exists = liveBits.get(Math.toIntExact(docId)); - assertEquals(Arrays.binarySearch(docIds, docId) >= 0, exists); + assertEquals(-1, sparseValues.docID()); + for (int i = 0; i < docIds.length; ++i) { + assertEquals(docIds[i], sparseValues.nextDoc()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc()); + + // advance + for (int i = 0; i < 2000; ++i) { + final int target = TestUtil.nextInt(random(), 0, (int) maxDoc); + int index = Arrays.binarySearch(docIds, target); + if (index < 0) { + index = -1 - index; + } + sparseValues.reset(); + if (index > 0) { + assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1]))); + } + if (index == docIds.length) { + assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target)); + } else { + assertEquals(docIds[index], sparseValues.advance(target)); + } } - final SparseLongValues sparseValues = new SparseLongValues(liveBits, valuesValues, missingValue); + final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue); + // random-access for (int i = 0; i < 2000; ++i) { - final long docId = TestUtil.nextLong(random(), 0, maxDoc - 1); + final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1); final int idx = Arrays.binarySearch(docIds, docId); - final long value = sparseValues.get(docId); + final long value = raWrapper.get(docId); if (idx >= 0) { assertEquals(values[idx], value); } else { assertEquals(missingValue, value); } } + // sequential access for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) { final int idx = Arrays.binarySearch(docIds, docId); - final long value = sparseValues.get(docId); + final long value = raWrapper.get(docId); if (idx >= 0) { assertEquals(values[idx], value); } else {