Speed up NumericDocValuesWriter with index sorting (#12381)

This commit is contained in:
zhangchao 2023-06-30 20:56:56 +08:00 committed by GitHub
parent e503805758
commit 01200b5804
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 9 deletions

View File

@ -140,6 +140,8 @@ Optimizations
* GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna)
* GITHUB#12381: Speed up NumericDocValuesWriter with index sorting. (Chao Zhang)
Bug Fixes
---------------------

View File

@ -76,7 +76,8 @@ class NormValuesWriter {
NumericDocValuesWriter.sortDocValues(
state.segmentInfo.maxDoc(),
sortMap,
new BufferedNorms(values, docsWithField.iterator()));
new BufferedNorms(values, docsWithField.iterator()),
sortMap.size() == docsWithField.cardinality());
} else {
sorted = null;
}

View File

@ -78,9 +78,14 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
return new BufferedNumericDocValues(finalValues, docsWithField.iterator());
}
static NumericDVs sortDocValues(int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues)
static NumericDVs sortDocValues(
int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues, boolean dense)
throws IOException {
FixedBitSet docsWithField = new FixedBitSet(maxDoc);
FixedBitSet docsWithField = null;
if (dense == false) {
docsWithField = new FixedBitSet(maxDoc);
}
long[] values = new long[maxDoc];
while (true) {
int docID = oldDocValues.nextDoc();
@ -88,7 +93,9 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
break;
}
int newDocID = sortMap.oldToNew(docID);
docsWithField.set(newDocID);
if (docsWithField != null) {
docsWithField.set(newDocID);
}
values[newDocID] = oldDocValues.longValue();
}
return new NumericDVs(values, docsWithField);
@ -114,7 +121,9 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
final NumericDVs sorted;
if (sortMap != null) {
NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator());
sorted = sortDocValues(sortMap.size(), sortMap, oldValues);
sorted =
sortDocValues(
sortMap.size(), sortMap, oldValues, sortMap.size() == docsWithField.cardinality());
} else {
sorted = null;
}
@ -197,10 +206,10 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
@Override
public int nextDoc() {
if (docID + 1 == dvs.docsWithField.length()) {
if (docID + 1 == dvs.maxDoc()) {
docID = NO_MORE_DOCS;
} else {
docID = dvs.docsWithField.nextSetBit(docID + 1);
docID = dvs.advance(docID + 1);
}
return docID;
}
@ -214,7 +223,7 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
public boolean advanceExact(int target) throws IOException {
// needed in IndexSorter#{Long|Int|Double|Float}Sorter
docID = target;
return dvs.docsWithField.get(target);
return dvs.advanceExact(target);
}
@Override
@ -225,7 +234,7 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
@Override
public long cost() {
if (cost == -1) {
cost = dvs.docsWithField.cardinality();
cost = dvs.cost();
}
return cost;
}
@ -234,10 +243,39 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
static class NumericDVs {
private final long[] values;
private final BitSet docsWithField;
private final int maxDoc;
NumericDVs(long[] values, BitSet docsWithField) {
this.values = values;
this.docsWithField = docsWithField;
this.maxDoc = values.length;
}
int maxDoc() {
return maxDoc;
}
private boolean advanceExact(int target) {
if (docsWithField != null) {
return docsWithField.get(target);
}
return true;
}
private int advance(int target) {
if (docsWithField != null) {
return docsWithField.nextSetBit(target);
}
// Only called when target is less than maxDoc
return target;
}
private long cost() {
if (docsWithField != null) {
return docsWithField.cardinality();
}
return maxDoc;
}
}
}