mirror of https://github.com/apache/lucene.git
LUCENE-10623: Error implementation of docValueCount for SortingSortedSetDocValues (#967)
This commit is contained in:
parent
7b58088bd5
commit
d8fb47b674
|
@ -804,12 +804,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
public int nextDoc() throws IOException {
|
||||
int doc = values.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
docValueCount = 0;
|
||||
for (long ord = values.nextOrd();
|
||||
ord != SortedSetDocValues.NO_MORE_ORDS;
|
||||
ord = values.nextOrd()) {
|
||||
ords = ArrayUtil.grow(ords, docValueCount + 1);
|
||||
ords[docValueCount++] = ord;
|
||||
docValueCount = values.docValueCount();
|
||||
ords = ArrayUtil.grow(ords, docValueCount);
|
||||
for (int j = 0; j < docValueCount; j++) {
|
||||
ords[j] = values.nextOrd();
|
||||
}
|
||||
i = 0;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefHash;
|
|||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
|
@ -228,7 +229,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
state.segmentInfo.maxDoc(),
|
||||
sortMap,
|
||||
getValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField),
|
||||
PackedInts.FASTEST);
|
||||
PackedInts.FASTEST,
|
||||
PackedInts.bitsRequired(maxCount));
|
||||
} else {
|
||||
docOrds = null;
|
||||
}
|
||||
|
@ -350,6 +352,8 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
private final DocOrds ords;
|
||||
private int docID = -1;
|
||||
private long ordUpto;
|
||||
private long limit;
|
||||
private int count;
|
||||
|
||||
SortingSortedSetDocValues(SortedSetDocValues in, DocOrds ords) {
|
||||
this.in = in;
|
||||
|
@ -369,7 +373,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
return docID = NO_MORE_DOCS;
|
||||
}
|
||||
} while (ords.offsets[docID] <= 0);
|
||||
ordUpto = ords.offsets[docID] - 1;
|
||||
initCount();
|
||||
return docID;
|
||||
}
|
||||
|
||||
|
@ -382,23 +386,23 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
public boolean advanceExact(int target) throws IOException {
|
||||
// needed in IndexSorter#StringSorter
|
||||
docID = target;
|
||||
ordUpto = ords.offsets[docID] - 1;
|
||||
initCount();
|
||||
return ords.offsets[docID] > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
long ord = ords.ords.get(ordUpto++);
|
||||
if (ord == 0) {
|
||||
if (limit == ordUpto) {
|
||||
return NO_MORE_ORDS;
|
||||
} else {
|
||||
return ord - 1;
|
||||
return ords.ords.get(ordUpto++);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docValueCount() {
|
||||
return (int) ords.ords.size();
|
||||
assert docID >= 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -415,34 +419,45 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
public long getValueCount() {
|
||||
return in.getValueCount();
|
||||
}
|
||||
|
||||
private void initCount() {
|
||||
assert docID >= 0;
|
||||
ordUpto = ords.offsets[docID] - 1;
|
||||
count = (int) ords.docValueCounts.get(docID);
|
||||
limit = ordUpto + count;
|
||||
}
|
||||
}
|
||||
|
||||
static final class DocOrds {
|
||||
final long[] offsets;
|
||||
final PackedLongValues ords;
|
||||
final GrowableWriter docValueCounts;
|
||||
|
||||
public static final int START_BITS_PER_VALUE = 2;
|
||||
|
||||
DocOrds(
|
||||
int maxDoc,
|
||||
Sorter.DocMap sortMap,
|
||||
SortedSetDocValues oldValues,
|
||||
float acceptableOverheadRatio)
|
||||
float acceptableOverheadRatio,
|
||||
int bitsPerValue)
|
||||
throws IOException {
|
||||
offsets = new long[maxDoc];
|
||||
PackedLongValues.Builder builder = PackedLongValues.packedBuilder(acceptableOverheadRatio);
|
||||
long ordOffset = 1; // 0 marks docs with no values
|
||||
docValueCounts = new GrowableWriter(bitsPerValue, maxDoc, acceptableOverheadRatio);
|
||||
long ordOffset = 1;
|
||||
int docID;
|
||||
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
||||
int newDocID = sortMap.oldToNew(docID);
|
||||
long startOffset = ordOffset;
|
||||
long ord;
|
||||
while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) {
|
||||
builder.add(ord + 1);
|
||||
builder.add(ord);
|
||||
ordOffset++;
|
||||
}
|
||||
docValueCounts.set(newDocID, ordOffset - startOffset);
|
||||
if (startOffset != ordOffset) { // do we have any values?
|
||||
offsets[newDocID] = startOffset;
|
||||
builder.add(0); // 0 ord marks next value
|
||||
ordOffset++;
|
||||
}
|
||||
}
|
||||
ords = builder.build();
|
||||
|
|
|
@ -483,7 +483,11 @@ public final class SortingCodecReader extends FilterCodecReader {
|
|||
field.name,
|
||||
() ->
|
||||
new SortedSetDocValuesWriter.DocOrds(
|
||||
maxDoc(), docMap, oldDocValues, PackedInts.FAST)));
|
||||
maxDoc(),
|
||||
docMap,
|
||||
oldDocValues,
|
||||
PackedInts.FAST,
|
||||
SortedSetDocValuesWriter.DocOrds.START_BITS_PER_VALUE)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -55,6 +56,45 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
public class TestSortingCodecReader extends LuceneTestCase {
|
||||
|
||||
public void testSortOnAddIndicesOrd() throws IOException {
|
||||
Directory tmpDir = newDirectory();
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
IndexWriter w = new IndexWriter(tmpDir, iwc);
|
||||
|
||||
Document doc;
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesField("foo", new BytesRef("b")));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc.add(new SortedSetDocValuesField("foo", new BytesRef("a")));
|
||||
doc.add(new SortedSetDocValuesField("foo", new BytesRef("b")));
|
||||
doc.add(new SortedSetDocValuesField("foo", new BytesRef("b")));
|
||||
w.addDocument(doc);
|
||||
|
||||
w.commit();
|
||||
|
||||
Sort indexSort = new Sort(new SortedSetSortField("foo", false, SortedSetSelector.Type.MIN));
|
||||
try (DirectoryReader reader = DirectoryReader.open(tmpDir)) {
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
CodecReader wrap =
|
||||
SortingCodecReader.wrap(SlowCodecReaderWrapper.wrap(ctx.reader()), indexSort);
|
||||
assertTrue(wrap.toString(), wrap.toString().startsWith("SortingCodecReader("));
|
||||
SortingCodecReader sortingCodecReader = (SortingCodecReader) wrap;
|
||||
SortedSetDocValues sortedSetDocValues =
|
||||
sortingCodecReader
|
||||
.getDocValuesReader()
|
||||
.getSortedSet(ctx.reader().getFieldInfos().fieldInfo("foo"));
|
||||
sortedSetDocValues.nextDoc();
|
||||
assertEquals(sortedSetDocValues.docValueCount(), 2);
|
||||
sortedSetDocValues.nextDoc();
|
||||
assertEquals(sortedSetDocValues.docValueCount(), 1);
|
||||
assertEquals(sortedSetDocValues.nextDoc(), DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
}
|
||||
IOUtils.close(w, dir, tmpDir);
|
||||
}
|
||||
|
||||
public void testSortOnAddIndicesInt() throws IOException {
|
||||
Directory tmpDir = newDirectory();
|
||||
Directory dir = newDirectory();
|
||||
|
|
|
@ -189,11 +189,8 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
|||
}
|
||||
|
||||
long lastOrd = -1;
|
||||
while (true) {
|
||||
for (int i = 0; i < values.docValueCount(); i++) {
|
||||
long ord = values.nextOrd();
|
||||
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
|
||||
break;
|
||||
}
|
||||
assert ord >= 0 && ord < valueCount
|
||||
: "ord=" + ord + " is not in bounds 0 .." + (valueCount - 1);
|
||||
assert ord > lastOrd : "ord=" + ord + ",lastOrd=" + lastOrd;
|
||||
|
|
Loading…
Reference in New Issue