Speed up NumericDocValuesWriter with index sorting (#12381)

This commit is contained in:
zhangchao 2023-06-30 20:56:56 +08:00 committed by GitHub
parent e503805758
commit 01200b5804
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 9 deletions

View File

@ -140,6 +140,8 @@ Optimizations
* GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna) * GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna)
* GITHUB#12381: Speed up NumericDocValuesWriter with index sorting. (Chao Zhang)
Bug Fixes Bug Fixes
--------------------- ---------------------

View File

@ -76,7 +76,8 @@ class NormValuesWriter {
NumericDocValuesWriter.sortDocValues( NumericDocValuesWriter.sortDocValues(
state.segmentInfo.maxDoc(), state.segmentInfo.maxDoc(),
sortMap, sortMap,
new BufferedNorms(values, docsWithField.iterator())); new BufferedNorms(values, docsWithField.iterator()),
sortMap.size() == docsWithField.cardinality());
} else { } else {
sorted = null; sorted = null;
} }

View File

@ -78,9 +78,14 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
return new BufferedNumericDocValues(finalValues, docsWithField.iterator()); return new BufferedNumericDocValues(finalValues, docsWithField.iterator());
} }
static NumericDVs sortDocValues(int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues) static NumericDVs sortDocValues(
int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues, boolean dense)
throws IOException { throws IOException {
FixedBitSet docsWithField = new FixedBitSet(maxDoc); FixedBitSet docsWithField = null;
if (dense == false) {
docsWithField = new FixedBitSet(maxDoc);
}
long[] values = new long[maxDoc]; long[] values = new long[maxDoc];
while (true) { while (true) {
int docID = oldDocValues.nextDoc(); int docID = oldDocValues.nextDoc();
@ -88,7 +93,9 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
break; break;
} }
int newDocID = sortMap.oldToNew(docID); int newDocID = sortMap.oldToNew(docID);
docsWithField.set(newDocID); if (docsWithField != null) {
docsWithField.set(newDocID);
}
values[newDocID] = oldDocValues.longValue(); values[newDocID] = oldDocValues.longValue();
} }
return new NumericDVs(values, docsWithField); return new NumericDVs(values, docsWithField);
@ -114,7 +121,9 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
final NumericDVs sorted; final NumericDVs sorted;
if (sortMap != null) { if (sortMap != null) {
NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator()); NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator());
sorted = sortDocValues(sortMap.size(), sortMap, oldValues); sorted =
sortDocValues(
sortMap.size(), sortMap, oldValues, sortMap.size() == docsWithField.cardinality());
} else { } else {
sorted = null; sorted = null;
} }
@ -197,10 +206,10 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
@Override @Override
public int nextDoc() { public int nextDoc() {
if (docID + 1 == dvs.docsWithField.length()) { if (docID + 1 == dvs.maxDoc()) {
docID = NO_MORE_DOCS; docID = NO_MORE_DOCS;
} else { } else {
docID = dvs.docsWithField.nextSetBit(docID + 1); docID = dvs.advance(docID + 1);
} }
return docID; return docID;
} }
@ -214,7 +223,7 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
public boolean advanceExact(int target) throws IOException { public boolean advanceExact(int target) throws IOException {
// needed in IndexSorter#{Long|Int|Double|Float}Sorter // needed in IndexSorter#{Long|Int|Double|Float}Sorter
docID = target; docID = target;
return dvs.docsWithField.get(target); return dvs.advanceExact(target);
} }
@Override @Override
@ -225,7 +234,7 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
@Override @Override
public long cost() { public long cost() {
if (cost == -1) { if (cost == -1) {
cost = dvs.docsWithField.cardinality(); cost = dvs.cost();
} }
return cost; return cost;
} }
@ -234,10 +243,39 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
static class NumericDVs { static class NumericDVs {
private final long[] values; private final long[] values;
private final BitSet docsWithField; private final BitSet docsWithField;
private final int maxDoc;
NumericDVs(long[] values, BitSet docsWithField) { NumericDVs(long[] values, BitSet docsWithField) {
this.values = values; this.values = values;
this.docsWithField = docsWithField; this.docsWithField = docsWithField;
this.maxDoc = values.length;
}
int maxDoc() {
return maxDoc;
}
private boolean advanceExact(int target) {
if (docsWithField != null) {
return docsWithField.get(target);
}
return true;
}
private int advance(int target) {
if (docsWithField != null) {
return docsWithField.nextSetBit(target);
}
// Only called when target is less than maxDoc
return target;
}
private long cost() {
if (docsWithField != null) {
return docsWithField.cardinality();
}
return maxDoc;
} }
} }
} }