diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index e9146980099..6a22eff1963 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -139,7 +139,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { return DocValues.singleton(valuesProducer.getNumeric(field)); } - }); + }, + false); } private static class MinMaxTracker { @@ -177,13 +178,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { } } - private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) + throws IOException { SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); int numDocsWithValue = 0; MinMaxTracker minMax = new MinMaxTracker(); MinMaxTracker blockMinMax = new MinMaxTracker(); long gcd = 0; - Set uniqueValues = new HashSet<>(); + Set uniqueValues = ords ? null : new HashSet<>(); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { for (int i = 0, count = values.docValueCount(); i < count; ++i) { long v = values.nextValue(); @@ -216,6 +218,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { minMax.finish(); blockMinMax.finish(); + if (ords && minMax.numValues > 0) { + if (minMax.min != 0) { + throw new IllegalStateException( + "The min value for ordinals should always be 0, got " + minMax.min); + } + if (minMax.max != 0 && gcd != 1) { + throw new IllegalStateException( + "GCD compression should never be used on ordinals, found gcd=" + gcd); + } + } + final long numValues = minMax.numValues; long min = minMax.min; final long max = minMax.max; @@ -508,7 +521,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { }; return DocValues.singleton(sortedOrds); } - }); + }, + true); addTermsDict(DocValues.singleton(valuesProducer.getSorted(field))); } @@ -669,7 +683,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { private void doAddSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { - long[] stats = writeValues(field, valuesProducer); + long[] stats = writeValues(field, valuesProducer, false); int numDocsWithField = Math.toIntExact(stats[0]); long numValues = stats[1]; assert numValues >= numDocsWithField; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index 5a215821f3b..eb58502e103 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -780,6 +780,104 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { } private SortedDocValues getSorted(SortedEntry entry) throws IOException { + // Specialize the common case for ordinals: single block of packed integers. + final NumericEntry ordsEntry = entry.ordsEntry; + if (ordsEntry.blockShift < 0 // single block + && ordsEntry.bitsPerValue > 0) { // more than 1 value + + if (ordsEntry.gcd != 1 || ordsEntry.minValue != 0 || ordsEntry.table != null) { + throw new IllegalStateException("Ordinals shouldn't use GCD, offset or table compression"); + } + + final RandomAccessInput slice = + data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength); + final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue); + + if (ordsEntry.docsWithFieldOffset == -1) { // dense + return new BaseSortedDocValues(entry, data) { + + private final int maxDoc = Lucene90DocValuesProducer.this.maxDoc; + private int doc = -1; + + @Override + public int ordValue() throws IOException { + return (int) values.get(doc); + } + + @Override + public boolean advanceExact(int target) throws IOException { + doc = target; + return true; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public long cost() { + return maxDoc; + } + }; + } else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty + final IndexedDISI disi = + new IndexedDISI( + data, + ordsEntry.docsWithFieldOffset, + ordsEntry.docsWithFieldLength, + ordsEntry.jumpTableEntryCount, + ordsEntry.denseRankPower, + ordsEntry.numValues); + + return new BaseSortedDocValues(entry, data) { + + @Override + public int ordValue() throws IOException { + return (int) values.get(disi.index()); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advanceExact(target); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } + } + final NumericDocValues ords = getNumeric(entry.ordsEntry); return new BaseSortedDocValues(entry, data) {