mirror of https://github.com/apache/lucene.git
LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)
This commit is contained in:
parent
8ac2673791
commit
2d7590a355
|
@ -139,7 +139,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
return DocValues.singleton(valuesProducer.getNumeric(field));
|
||||
}
|
||||
});
|
||||
},
|
||||
false);
|
||||
}
|
||||
|
||||
private static class MinMaxTracker {
|
||||
|
@ -177,13 +178,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
|
||||
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
|
||||
throws IOException {
|
||||
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
|
||||
int numDocsWithValue = 0;
|
||||
MinMaxTracker minMax = new MinMaxTracker();
|
||||
MinMaxTracker blockMinMax = new MinMaxTracker();
|
||||
long gcd = 0;
|
||||
Set<Long> uniqueValues = new HashSet<>();
|
||||
Set<Long> uniqueValues = ords ? null : new HashSet<>();
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
|
||||
long v = values.nextValue();
|
||||
|
@ -216,6 +218,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
minMax.finish();
|
||||
blockMinMax.finish();
|
||||
|
||||
if (ords && minMax.numValues > 0) {
|
||||
if (minMax.min != 0) {
|
||||
throw new IllegalStateException(
|
||||
"The min value for ordinals should always be 0, got " + minMax.min);
|
||||
}
|
||||
if (minMax.max != 0 && gcd != 1) {
|
||||
throw new IllegalStateException(
|
||||
"GCD compression should never be used on ordinals, found gcd=" + gcd);
|
||||
}
|
||||
}
|
||||
|
||||
final long numValues = minMax.numValues;
|
||||
long min = minMax.min;
|
||||
final long max = minMax.max;
|
||||
|
@ -508,7 +521,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
};
|
||||
return DocValues.singleton(sortedOrds);
|
||||
}
|
||||
});
|
||||
},
|
||||
true);
|
||||
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
|
||||
}
|
||||
|
||||
|
@ -669,7 +683,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
|
||||
private void doAddSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer)
|
||||
throws IOException {
|
||||
long[] stats = writeValues(field, valuesProducer);
|
||||
long[] stats = writeValues(field, valuesProducer, false);
|
||||
int numDocsWithField = Math.toIntExact(stats[0]);
|
||||
long numValues = stats[1];
|
||||
assert numValues >= numDocsWithField;
|
||||
|
|
|
@ -780,6 +780,104 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
|
||||
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
|
||||
// Specialize the common case for ordinals: single block of packed integers.
|
||||
final NumericEntry ordsEntry = entry.ordsEntry;
|
||||
if (ordsEntry.blockShift < 0 // single block
|
||||
&& ordsEntry.bitsPerValue > 0) { // more than 1 value
|
||||
|
||||
if (ordsEntry.gcd != 1 || ordsEntry.minValue != 0 || ordsEntry.table != null) {
|
||||
throw new IllegalStateException("Ordinals shouldn't use GCD, offset or table compression");
|
||||
}
|
||||
|
||||
final RandomAccessInput slice =
|
||||
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
|
||||
final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
|
||||
|
||||
if (ordsEntry.docsWithFieldOffset == -1) { // dense
|
||||
return new BaseSortedDocValues(entry, data) {
|
||||
|
||||
private final int maxDoc = Lucene90DocValuesProducer.this.maxDoc;
|
||||
private int doc = -1;
|
||||
|
||||
@Override
|
||||
public int ordValue() throws IOException {
|
||||
return (int) values.get(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
doc = target;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return maxDoc;
|
||||
}
|
||||
};
|
||||
} else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
|
||||
final IndexedDISI disi =
|
||||
new IndexedDISI(
|
||||
data,
|
||||
ordsEntry.docsWithFieldOffset,
|
||||
ordsEntry.docsWithFieldLength,
|
||||
ordsEntry.jumpTableEntryCount,
|
||||
ordsEntry.denseRankPower,
|
||||
ordsEntry.numValues);
|
||||
|
||||
return new BaseSortedDocValues(entry, data) {
|
||||
|
||||
@Override
|
||||
public int ordValue() throws IOException {
|
||||
return (int) values.get(disi.index());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
return disi.advanceExact(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return disi.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return disi.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return disi.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return disi.cost();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
final NumericDocValues ords = getNumeric(entry.ordsEntry);
|
||||
return new BaseSortedDocValues(entry, data) {
|
||||
|
||||
|
|
Loading…
Reference in New Issue