LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)

This commit is contained in:
Adrien Grand 2021-08-26 09:44:24 +02:00 committed by GitHub
parent 8ac2673791
commit 2d7590a355
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 117 additions and 5 deletions

View File

@ -139,7 +139,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return DocValues.singleton(valuesProducer.getNumeric(field));
}
});
},
false);
}
private static class MinMaxTracker {
@ -177,13 +178,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}
}
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
throws IOException {
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
int numDocsWithValue = 0;
MinMaxTracker minMax = new MinMaxTracker();
MinMaxTracker blockMinMax = new MinMaxTracker();
long gcd = 0;
Set<Long> uniqueValues = new HashSet<>();
Set<Long> uniqueValues = ords ? null : new HashSet<>();
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
long v = values.nextValue();
@ -216,6 +218,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
minMax.finish();
blockMinMax.finish();
if (ords && minMax.numValues > 0) {
if (minMax.min != 0) {
throw new IllegalStateException(
"The min value for ordinals should always be 0, got " + minMax.min);
}
if (minMax.max != 0 && gcd != 1) {
throw new IllegalStateException(
"GCD compression should never be used on ordinals, found gcd=" + gcd);
}
}
final long numValues = minMax.numValues;
long min = minMax.min;
final long max = minMax.max;
@ -508,7 +521,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
};
return DocValues.singleton(sortedOrds);
}
});
},
true);
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
@ -669,7 +683,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void doAddSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
long[] stats = writeValues(field, valuesProducer);
long[] stats = writeValues(field, valuesProducer, false);
int numDocsWithField = Math.toIntExact(stats[0]);
long numValues = stats[1];
assert numValues >= numDocsWithField;

View File

@ -780,6 +780,104 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
// Specialize the common case for ordinals: single block of packed integers.
final NumericEntry ordsEntry = entry.ordsEntry;
if (ordsEntry.blockShift < 0 // single block
&& ordsEntry.bitsPerValue > 0) { // more than 1 value
if (ordsEntry.gcd != 1 || ordsEntry.minValue != 0 || ordsEntry.table != null) {
throw new IllegalStateException("Ordinals shouldn't use GCD, offset or table compression");
}
final RandomAccessInput slice =
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
if (ordsEntry.docsWithFieldOffset == -1) { // dense
return new BaseSortedDocValues(entry, data) {
private final int maxDoc = Lucene90DocValuesProducer.this.maxDoc;
private int doc = -1;
@Override
public int ordValue() throws IOException {
return (int) values.get(doc);
}
@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
return true;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return doc = NO_MORE_DOCS;
}
return doc = target;
}
@Override
public long cost() {
return maxDoc;
}
};
} else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
final IndexedDISI disi =
new IndexedDISI(
data,
ordsEntry.docsWithFieldOffset,
ordsEntry.docsWithFieldLength,
ordsEntry.jumpTableEntryCount,
ordsEntry.denseRankPower,
ordsEntry.numValues);
return new BaseSortedDocValues(entry, data) {
@Override
public int ordValue() throws IOException {
return (int) values.get(disi.index());
}
@Override
public boolean advanceExact(int target) throws IOException {
return disi.advanceExact(target);
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
}
final NumericDocValues ords = getNumeric(entry.ordsEntry);
return new BaseSortedDocValues(entry, data) {