diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6732ded366c..5cd2c70352c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -140,6 +140,8 @@ Optimizations * GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna) +* GITHUB#12381: Speed up NumericDocValuesWriter with index sorting. (Chao Zhang) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java index 8fe73768f61..dfd98ab291e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java @@ -76,7 +76,8 @@ class NormValuesWriter { NumericDocValuesWriter.sortDocValues( state.segmentInfo.maxDoc(), sortMap, - new BufferedNorms(values, docsWithField.iterator())); + new BufferedNorms(values, docsWithField.iterator()), + sortMap.size() == docsWithField.cardinality()); } else { sorted = null; } diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java index f4501108643..09bef657b2d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java @@ -78,9 +78,14 @@ class NumericDocValuesWriter extends DocValuesWriter { return new BufferedNumericDocValues(finalValues, docsWithField.iterator()); } - static NumericDVs sortDocValues(int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues) + static NumericDVs sortDocValues( + int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues, boolean dense) throws IOException { - FixedBitSet docsWithField = new FixedBitSet(maxDoc); + FixedBitSet docsWithField = null; + if (dense == false) { + docsWithField = new FixedBitSet(maxDoc); + } + long[] values = new long[maxDoc]; while (true) { int docID = oldDocValues.nextDoc(); @@ -88,7 +93,9 @@ class NumericDocValuesWriter extends DocValuesWriter { break; } int newDocID = sortMap.oldToNew(docID); - docsWithField.set(newDocID); + if (docsWithField != null) { + docsWithField.set(newDocID); + } values[newDocID] = oldDocValues.longValue(); } return new NumericDVs(values, docsWithField); @@ -114,7 +121,9 @@ class NumericDocValuesWriter extends DocValuesWriter { final NumericDVs sorted; if (sortMap != null) { NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator()); - sorted = sortDocValues(sortMap.size(), sortMap, oldValues); + sorted = + sortDocValues( + sortMap.size(), sortMap, oldValues, sortMap.size() == docsWithField.cardinality()); } else { sorted = null; } @@ -197,10 +206,10 @@ class NumericDocValuesWriter extends DocValuesWriter { @Override public int nextDoc() { - if (docID + 1 == dvs.docsWithField.length()) { + if (docID + 1 == dvs.maxDoc()) { docID = NO_MORE_DOCS; } else { - docID = dvs.docsWithField.nextSetBit(docID + 1); + docID = dvs.advance(docID + 1); } return docID; } @@ -214,7 +223,7 @@ class NumericDocValuesWriter extends DocValuesWriter { public boolean advanceExact(int target) throws IOException { // needed in IndexSorter#{Long|Int|Double|Float}Sorter docID = target; - return dvs.docsWithField.get(target); + return dvs.advanceExact(target); } @Override @@ -225,7 +234,7 @@ class NumericDocValuesWriter extends DocValuesWriter { @Override public long cost() { if (cost == -1) { - cost = dvs.docsWithField.cardinality(); + cost = dvs.cost(); } return cost; } @@ -234,10 +243,39 @@ class NumericDocValuesWriter extends DocValuesWriter { static class NumericDVs { private final long[] values; private final BitSet docsWithField; + private final int maxDoc; NumericDVs(long[] values, BitSet docsWithField) { this.values = values; this.docsWithField = docsWithField; + this.maxDoc = values.length; + } + + int maxDoc() { + return maxDoc; + } + + private boolean advanceExact(int target) { + if (docsWithField != null) { + return docsWithField.get(target); + } + return true; + } + + private int advance(int target) { + if (docsWithField != null) { + return docsWithField.nextSetBit(target); + } + + // Only called when target is less than maxDoc + return target; + } + + private long cost() { + if (docsWithField != null) { + return docsWithField.cardinality(); + } + return maxDoc; } } }