LUCENE-10123: Handling of singletons in DocValuesConsumer. (#320)

This avoids double wrapping of doc values in `Lucene90DocValuesConsumer`.
This commit is contained in:
Adrien Grand 2021-09-28 08:54:46 +02:00 committed by GitHub
parent 1ebd193fbe
commit 7357bdc272
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 188 additions and 219 deletions

View File

@ -191,7 +191,6 @@ public abstract class DocValuesConsumer implements Closeable {
List<NumericDocValuesSub> subs = new ArrayList<>();
assert mergeState.docMaps.length == mergeState.docValuesProducers.length;
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
NumericDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
@ -203,60 +202,67 @@ public abstract class DocValuesConsumer implements Closeable {
}
}
if (values != null) {
cost += values.cost();
subs.add(new NumericDocValuesSub(mergeState.docMaps[i], values));
}
}
final DocIDMerger<NumericDocValuesSub> docIDMerger =
DocIDMerger.of(subs, mergeState.needsIndexSort);
final long finalCost = cost;
return new NumericDocValues() {
private int docID = -1;
private NumericDocValuesSub current;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
current = docIDMerger.next();
if (current == null) {
docID = NO_MORE_DOCS;
} else {
docID = current.mappedDocID;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return finalCost;
}
@Override
public long longValue() throws IOException {
return current.values.longValue();
}
};
return mergeNumericValues(subs, mergeState.needsIndexSort);
}
});
}
private static NumericDocValues mergeNumericValues(
List<NumericDocValuesSub> subs, boolean indexIsSorted) throws IOException {
long cost = 0;
for (NumericDocValuesSub sub : subs) {
cost += sub.values.cost();
}
final long finalCost = cost;
final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, indexIsSorted);
return new NumericDocValues() {
private int docID = -1;
private NumericDocValuesSub current;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
current = docIDMerger.next();
if (current == null) {
docID = NO_MORE_DOCS;
} else {
docID = current.mappedDocID;
}
return docID;
}
@Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return finalCost;
}
@Override
public long longValue() throws IOException {
return current.values.longValue();
}
};
}
/** Tracks state of one binary sub-reader that we are merging */
private static class BinaryDocValuesSub extends DocIDMerger.Sub {
@ -396,6 +402,7 @@ public abstract class DocValuesConsumer implements Closeable {
// We must make new iterators + DocIDMerger for each iterator:
List<SortedNumericDocValuesSub> subs = new ArrayList<>();
long cost = 0;
boolean allSingletons = true;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
SortedNumericDocValues values = null;
@ -410,9 +417,26 @@ public abstract class DocValuesConsumer implements Closeable {
values = DocValues.emptySortedNumeric();
}
cost += values.cost();
if (allSingletons && DocValues.unwrapSingleton(values) == null) {
allSingletons = false;
}
subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], values));
}
if (allSingletons) {
// All subs are single-valued.
// We specialize for that case since it makes it easier for codecs to optimize
// for single-valued fields.
List<NumericDocValuesSub> singleValuedSubs = new ArrayList<>();
for (SortedNumericDocValuesSub sub : subs) {
final NumericDocValues singleValuedValues = DocValues.unwrapSingleton(sub.values);
assert singleValuedValues != null;
singleValuedSubs.add(new NumericDocValuesSub(sub.docMap, singleValuedValues));
}
return DocValues.singleton(
mergeNumericValues(singleValuedSubs, mergeState.needsIndexSort));
}
final long finalCost = cost;
final DocIDMerger<SortedNumericDocValuesSub> docIDMerger =
@ -651,7 +675,6 @@ public abstract class DocValuesConsumer implements Closeable {
// We must make new iterators + DocIDMerger for each iterator:
List<SortedDocValuesSub> subs = new ArrayList<>();
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
@ -665,83 +688,90 @@ public abstract class DocValuesConsumer implements Closeable {
if (values == null) {
values = DocValues.emptySorted();
}
cost += values.cost();
subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final long finalCost = cost;
final DocIDMerger<SortedDocValuesSub> docIDMerger =
DocIDMerger.of(subs, mergeState.needsIndexSort);
return new SortedDocValues() {
private int docID = -1;
private int ord;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
SortedDocValuesSub sub = docIDMerger.next();
if (sub == null) {
return docID = NO_MORE_DOCS;
}
int subOrd = sub.values.ordValue();
assert subOrd != -1;
ord = (int) sub.map.get(subOrd);
docID = sub.mappedDocID;
return docID;
}
@Override
public int ordValue() {
return ord;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return finalCost;
}
@Override
public int getValueCount() {
return (int) map.getValueCount();
}
@Override
public BytesRef lookupOrd(int ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
return dvs[segmentNumber].lookupOrd(segmentOrd);
}
@Override
public TermsEnum termsEnum() throws IOException {
TermsEnum[] subs = new TermsEnum[toMerge.size()];
for (int sub = 0; sub < subs.length; ++sub) {
subs[sub] = toMerge.get(sub).termsEnum();
}
return new MergedTermsEnum(map, subs);
}
};
return mergeSortedValues(subs, mergeState.needsIndexSort, map);
}
});
}
private static SortedDocValues mergeSortedValues(
List<SortedDocValuesSub> subs, boolean indexIsSorted, OrdinalMap map) throws IOException {
long cost = 0;
for (SortedDocValuesSub sub : subs) {
cost += sub.values.cost();
}
final long finalCost = cost;
final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, indexIsSorted);
return new SortedDocValues() {
private int docID = -1;
private int ord;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
SortedDocValuesSub sub = docIDMerger.next();
if (sub == null) {
return docID = NO_MORE_DOCS;
}
int subOrd = sub.values.ordValue();
assert subOrd != -1;
ord = (int) sub.map.get(subOrd);
docID = sub.mappedDocID;
return docID;
}
@Override
public int ordValue() {
return ord;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return finalCost;
}
@Override
public int getValueCount() {
return (int) map.getValueCount();
}
@Override
public BytesRef lookupOrd(int ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
return subs.get(segmentNumber).values.lookupOrd(segmentOrd);
}
@Override
public TermsEnum termsEnum() throws IOException {
TermsEnum[] termsEnurmSubs = new TermsEnum[subs.size()];
for (int sub = 0; sub < termsEnurmSubs.length; ++sub) {
termsEnurmSubs[sub] = subs.get(sub).values.termsEnum();
}
return new MergedTermsEnum(map, termsEnurmSubs);
}
};
}
/** Tracks state of one sorted set sub-reader that we are merging */
private static class SortedSetDocValuesSub extends DocIDMerger.Sub {
@ -834,6 +864,7 @@ public abstract class DocValuesConsumer implements Closeable {
List<SortedSetDocValuesSub> subs = new ArrayList<>();
long cost = 0;
boolean allSingletons = true;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedSetDocValues values = null;
@ -849,10 +880,28 @@ public abstract class DocValuesConsumer implements Closeable {
values = DocValues.emptySortedSet();
}
cost += values.cost();
if (allSingletons && DocValues.unwrapSingleton(values) == null) {
allSingletons = false;
}
subs.add(
new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
if (allSingletons) {
// All subs are single-valued.
// We specialize for that case since it makes it easier for codecs to optimize
// for single-valued fields.
List<SortedDocValuesSub> singleValuedSubs = new ArrayList<>();
for (SortedSetDocValuesSub sub : subs) {
final SortedDocValues singleValuedValues = DocValues.unwrapSingleton(sub.values);
assert singleValuedValues != null;
singleValuedSubs.add(
new SortedDocValuesSub(sub.docMap, singleValuedValues, sub.map));
}
return DocValues.singleton(
mergeSortedValues(singleValuedSubs, mergeState.needsIndexSort, map));
}
final DocIDMerger<SortedSetDocValuesSub> docIDMerger =
DocIDMerger.of(subs, mergeState.needsIndexSort);

View File

@ -711,25 +711,29 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}
}
private static boolean isSingleValued(SortedSetDocValues values) throws IOException {
if (DocValues.unwrapSingleton(values) != null) {
return true;
}
assert values.docID() == -1;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
final long firstOrd = values.nextOrd();
assert firstOrd != SortedSetDocValues.NO_MORE_ORDS;
if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
return false;
}
}
return true;
}
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.SORTED_SET);
SortedSetDocValues values = valuesProducer.getSortedSet(field);
int numDocsWithField = 0;
long numOrds = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
for (long ord = values.nextOrd();
ord != SortedSetDocValues.NO_MORE_ORDS;
ord = values.nextOrd()) {
numOrds++;
}
}
if (numDocsWithField == numOrds) {
if (isSingleValued(valuesProducer.getSortedSet(field))) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
doAddSortedField(
field,
@ -804,6 +808,6 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}
});
addTermsDict(values);
addTermsDict(valuesProducer.getSortedSet(field));
}
}

View File

@ -36,7 +36,8 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
/** Mapped doc ID */
public int mappedDocID;
final MergeState.DocMap docMap;
/** Map from old to new doc IDs */
public final MergeState.DocMap docMap;
/** Sole constructor */
protected Sub(MergeState.DocMap docMap) {

View File

@ -157,97 +157,12 @@ public final class DocValues {
/** An empty SortedNumericDocValues which returns zero values for every document */
public static final SortedNumericDocValues emptySortedNumeric() {
return new SortedNumericDocValues() {
private int doc = -1;
@Override
public int advance(int target) {
return doc = NO_MORE_DOCS;
}
@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
return false;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
return doc = NO_MORE_DOCS;
}
@Override
public long cost() {
return 0;
}
@Override
public int docValueCount() {
throw new IllegalStateException();
}
@Override
public long nextValue() {
throw new IllegalStateException();
}
};
return singleton(emptyNumeric());
}
/** An empty SortedDocValues which returns {@link BytesRef#EMPTY_BYTES} for every document */
public static final SortedSetDocValues emptySortedSet() {
final BytesRef empty = new BytesRef();
return new SortedSetDocValues() {
private int doc = -1;
@Override
public int advance(int target) {
return doc = NO_MORE_DOCS;
}
@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
return false;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
return doc = NO_MORE_DOCS;
}
@Override
public long cost() {
return 0;
}
@Override
public long nextOrd() {
assert false;
return NO_MORE_ORDS;
}
@Override
public BytesRef lookupOrd(long ord) {
return empty;
}
@Override
public long getValueCount() {
return 0;
}
};
return singleton(emptySorted());
}
/** Returns a multi-valued view over the provided SortedDocValues */