mirror of https://github.com/apache/lucene.git
LUCENE-10623: Error implementation of docValueCount for SortingSortedSetDocValues (#967)
This commit is contained in:
parent
7b58088bd5
commit
d8fb47b674
|
@ -804,12 +804,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
int doc = values.nextDoc();
|
int doc = values.nextDoc();
|
||||||
if (doc != NO_MORE_DOCS) {
|
if (doc != NO_MORE_DOCS) {
|
||||||
docValueCount = 0;
|
docValueCount = values.docValueCount();
|
||||||
for (long ord = values.nextOrd();
|
ords = ArrayUtil.grow(ords, docValueCount);
|
||||||
ord != SortedSetDocValues.NO_MORE_ORDS;
|
for (int j = 0; j < docValueCount; j++) {
|
||||||
ord = values.nextOrd()) {
|
ords[j] = values.nextOrd();
|
||||||
ords = ArrayUtil.grow(ords, docValueCount + 1);
|
|
||||||
ords[docValueCount++] = ord;
|
|
||||||
}
|
}
|
||||||
i = 0;
|
i = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.apache.lucene.util.packed.PackedLongValues;
|
import org.apache.lucene.util.packed.PackedLongValues;
|
||||||
|
|
||||||
|
@ -228,7 +229,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
state.segmentInfo.maxDoc(),
|
state.segmentInfo.maxDoc(),
|
||||||
sortMap,
|
sortMap,
|
||||||
getValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField),
|
getValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField),
|
||||||
PackedInts.FASTEST);
|
PackedInts.FASTEST,
|
||||||
|
PackedInts.bitsRequired(maxCount));
|
||||||
} else {
|
} else {
|
||||||
docOrds = null;
|
docOrds = null;
|
||||||
}
|
}
|
||||||
|
@ -350,6 +352,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
private final DocOrds ords;
|
private final DocOrds ords;
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private long ordUpto;
|
private long ordUpto;
|
||||||
|
private long limit;
|
||||||
|
private int count;
|
||||||
|
|
||||||
SortingSortedSetDocValues(SortedSetDocValues in, DocOrds ords) {
|
SortingSortedSetDocValues(SortedSetDocValues in, DocOrds ords) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
|
@ -369,7 +373,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
return docID = NO_MORE_DOCS;
|
return docID = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
} while (ords.offsets[docID] <= 0);
|
} while (ords.offsets[docID] <= 0);
|
||||||
ordUpto = ords.offsets[docID] - 1;
|
initCount();
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,23 +386,23 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
public boolean advanceExact(int target) throws IOException {
|
public boolean advanceExact(int target) throws IOException {
|
||||||
// needed in IndexSorter#StringSorter
|
// needed in IndexSorter#StringSorter
|
||||||
docID = target;
|
docID = target;
|
||||||
ordUpto = ords.offsets[docID] - 1;
|
initCount();
|
||||||
return ords.offsets[docID] > 0;
|
return ords.offsets[docID] > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long nextOrd() {
|
public long nextOrd() {
|
||||||
long ord = ords.ords.get(ordUpto++);
|
if (limit == ordUpto) {
|
||||||
if (ord == 0) {
|
|
||||||
return NO_MORE_ORDS;
|
return NO_MORE_ORDS;
|
||||||
} else {
|
} else {
|
||||||
return ord - 1;
|
return ords.ords.get(ordUpto++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docValueCount() {
|
public int docValueCount() {
|
||||||
return (int) ords.ords.size();
|
assert docID >= 0;
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -415,34 +419,45 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
public long getValueCount() {
|
public long getValueCount() {
|
||||||
return in.getValueCount();
|
return in.getValueCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void initCount() {
|
||||||
|
assert docID >= 0;
|
||||||
|
ordUpto = ords.offsets[docID] - 1;
|
||||||
|
count = (int) ords.docValueCounts.get(docID);
|
||||||
|
limit = ordUpto + count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class DocOrds {
|
static final class DocOrds {
|
||||||
final long[] offsets;
|
final long[] offsets;
|
||||||
final PackedLongValues ords;
|
final PackedLongValues ords;
|
||||||
|
final GrowableWriter docValueCounts;
|
||||||
|
|
||||||
|
public static final int START_BITS_PER_VALUE = 2;
|
||||||
|
|
||||||
DocOrds(
|
DocOrds(
|
||||||
int maxDoc,
|
int maxDoc,
|
||||||
Sorter.DocMap sortMap,
|
Sorter.DocMap sortMap,
|
||||||
SortedSetDocValues oldValues,
|
SortedSetDocValues oldValues,
|
||||||
float acceptableOverheadRatio)
|
float acceptableOverheadRatio,
|
||||||
|
int bitsPerValue)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
offsets = new long[maxDoc];
|
offsets = new long[maxDoc];
|
||||||
PackedLongValues.Builder builder = PackedLongValues.packedBuilder(acceptableOverheadRatio);
|
PackedLongValues.Builder builder = PackedLongValues.packedBuilder(acceptableOverheadRatio);
|
||||||
long ordOffset = 1; // 0 marks docs with no values
|
docValueCounts = new GrowableWriter(bitsPerValue, maxDoc, acceptableOverheadRatio);
|
||||||
|
long ordOffset = 1;
|
||||||
int docID;
|
int docID;
|
||||||
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
||||||
int newDocID = sortMap.oldToNew(docID);
|
int newDocID = sortMap.oldToNew(docID);
|
||||||
long startOffset = ordOffset;
|
long startOffset = ordOffset;
|
||||||
long ord;
|
long ord;
|
||||||
while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) {
|
while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) {
|
||||||
builder.add(ord + 1);
|
builder.add(ord);
|
||||||
ordOffset++;
|
ordOffset++;
|
||||||
}
|
}
|
||||||
|
docValueCounts.set(newDocID, ordOffset - startOffset);
|
||||||
if (startOffset != ordOffset) { // do we have any values?
|
if (startOffset != ordOffset) { // do we have any values?
|
||||||
offsets[newDocID] = startOffset;
|
offsets[newDocID] = startOffset;
|
||||||
builder.add(0); // 0 ord marks next value
|
|
||||||
ordOffset++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ords = builder.build();
|
ords = builder.build();
|
||||||
|
|
|
@ -483,7 +483,11 @@ public final class SortingCodecReader extends FilterCodecReader {
|
||||||
field.name,
|
field.name,
|
||||||
() ->
|
() ->
|
||||||
new SortedSetDocValuesWriter.DocOrds(
|
new SortedSetDocValuesWriter.DocOrds(
|
||||||
maxDoc(), docMap, oldDocValues, PackedInts.FAST)));
|
maxDoc(),
|
||||||
|
docMap,
|
||||||
|
oldDocValues,
|
||||||
|
PackedInts.FAST,
|
||||||
|
SortedSetDocValuesWriter.DocOrds.START_BITS_PER_VALUE)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.SortedNumericSortField;
|
import org.apache.lucene.search.SortedNumericSortField;
|
||||||
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
import org.apache.lucene.search.SortedSetSortField;
|
import org.apache.lucene.search.SortedSetSortField;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
@ -55,6 +56,45 @@ import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
public class TestSortingCodecReader extends LuceneTestCase {
|
public class TestSortingCodecReader extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testSortOnAddIndicesOrd() throws IOException {
|
||||||
|
Directory tmpDir = newDirectory();
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
IndexWriter w = new IndexWriter(tmpDir, iwc);
|
||||||
|
|
||||||
|
Document doc;
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new SortedSetDocValuesField("foo", new BytesRef("b")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc.add(new SortedSetDocValuesField("foo", new BytesRef("a")));
|
||||||
|
doc.add(new SortedSetDocValuesField("foo", new BytesRef("b")));
|
||||||
|
doc.add(new SortedSetDocValuesField("foo", new BytesRef("b")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
w.commit();
|
||||||
|
|
||||||
|
Sort indexSort = new Sort(new SortedSetSortField("foo", false, SortedSetSelector.Type.MIN));
|
||||||
|
try (DirectoryReader reader = DirectoryReader.open(tmpDir)) {
|
||||||
|
for (LeafReaderContext ctx : reader.leaves()) {
|
||||||
|
CodecReader wrap =
|
||||||
|
SortingCodecReader.wrap(SlowCodecReaderWrapper.wrap(ctx.reader()), indexSort);
|
||||||
|
assertTrue(wrap.toString(), wrap.toString().startsWith("SortingCodecReader("));
|
||||||
|
SortingCodecReader sortingCodecReader = (SortingCodecReader) wrap;
|
||||||
|
SortedSetDocValues sortedSetDocValues =
|
||||||
|
sortingCodecReader
|
||||||
|
.getDocValuesReader()
|
||||||
|
.getSortedSet(ctx.reader().getFieldInfos().fieldInfo("foo"));
|
||||||
|
sortedSetDocValues.nextDoc();
|
||||||
|
assertEquals(sortedSetDocValues.docValueCount(), 2);
|
||||||
|
sortedSetDocValues.nextDoc();
|
||||||
|
assertEquals(sortedSetDocValues.docValueCount(), 1);
|
||||||
|
assertEquals(sortedSetDocValues.nextDoc(), DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IOUtils.close(w, dir, tmpDir);
|
||||||
|
}
|
||||||
|
|
||||||
public void testSortOnAddIndicesInt() throws IOException {
|
public void testSortOnAddIndicesInt() throws IOException {
|
||||||
Directory tmpDir = newDirectory();
|
Directory tmpDir = newDirectory();
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
|
|
@ -189,11 +189,8 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
long lastOrd = -1;
|
long lastOrd = -1;
|
||||||
while (true) {
|
for (int i = 0; i < values.docValueCount(); i++) {
|
||||||
long ord = values.nextOrd();
|
long ord = values.nextOrd();
|
||||||
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
assert ord >= 0 && ord < valueCount
|
assert ord >= 0 && ord < valueCount
|
||||||
: "ord=" + ord + " is not in bounds 0 .." + (valueCount - 1);
|
: "ord=" + ord + " is not in bounds 0 .." + (valueCount - 1);
|
||||||
assert ord > lastOrd : "ord=" + ord + ",lastOrd=" + lastOrd;
|
assert ord > lastOrd : "ord=" + ord + ",lastOrd=" + lastOrd;
|
||||||
|
|
Loading…
Reference in New Issue