From 643429de6e162fd85d5100137d01ee29e4bb614a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 26 Oct 2016 14:07:43 +0200 Subject: [PATCH] LUCENE-7489: Remove one layer of abstraction in binary doc values and single-valued numerics. --- .../lucene70/Lucene70DocValuesProducer.java | 425 +++++++++++------- 1 file changed, 259 insertions(+), 166 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java index 19815bab218..3f3e73f2e38 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java @@ -340,88 +340,147 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close return getNumeric(entry); } + private static abstract class DenseNumericDocValues extends NumericDocValues { + + final int maxDoc; + int doc = -1; + + DenseNumericDocValues(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public boolean advanceExact(int target) { + doc = target; + return true; + } + + @Override + public long cost() { + return maxDoc; + } + + } + + private static abstract class SparseNumericDocValues extends NumericDocValues { + + final IndexedDISI disi; + + SparseNumericDocValues(IndexedDISI disi) { + this.disi = disi; + } + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advanceExact(target); + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public long cost() { + return disi.cost(); + } + } + private NumericDocValues getNumeric(NumericEntry entry) throws IOException { if (entry.docsWithFieldOffset == -2) { // empty return DocValues.emptyNumeric(); } else if (entry.docsWithFieldOffset == -1) { // dense - final LongValues normValues = getNumericValues(entry); - return new NumericDocValues() { - - int doc = -1; - - @Override - public long longValue() throws IOException { - return normValues.get(doc); - } - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - if (target >= maxDoc) { - return doc = NO_MORE_DOCS; + if (entry.bitsPerValue == 0) { + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() throws IOException { + return entry.minValue; } - return doc = target; + }; + } else { + final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength); + final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue); + if (entry.table != null) { + final long[] table = entry.table; + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() throws IOException { + return table[(int) values.get(doc)]; + } + }; + } else { + final long mul = entry.gcd; + final long delta = entry.minValue; + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() throws IOException { + return mul * values.get(doc) + delta; + } + }; } - - @Override - public boolean advanceExact(int target) { - doc = target; - return true; - } - - @Override - public long cost() { - return maxDoc; - } - - }; + } } else { // sparse - final LongValues values = getNumericValues(entry); final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numValues); - return new NumericDocValues() { - - @Override - public int advance(int target) throws IOException { - return disi.advance(target); + if (entry.bitsPerValue == 0) { + return new SparseNumericDocValues(disi) { + @Override + public long longValue() throws IOException { + return entry.minValue; + } + }; + } else { + final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength); + final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue); + if (entry.table != null) { + final long[] table = entry.table; + return new SparseNumericDocValues(disi) { + @Override + public long longValue() throws IOException { + return table[(int) values.get(disi.index())]; + } + }; + } else { + final long mul = entry.gcd; + final long delta = entry.minValue; + return new SparseNumericDocValues(disi) { + @Override + public long longValue() throws IOException { + return mul * values.get(disi.index()) + delta; + } + }; } - - @Override - public boolean advanceExact(int target) throws IOException { - return disi.advanceExact(target); - } - - @Override - public int nextDoc() throws IOException { - return disi.nextDoc(); - } - - @Override - public int docID() { - return disi.docID(); - } - - @Override - public long cost() { - return disi.cost(); - } - - @Override - public long longValue() throws IOException { - return values.get(disi.index()); - } - }; + } } } @@ -467,6 +526,79 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close } } + private static abstract class DenseBinaryDocValues extends BinaryDocValues { + + final int maxDoc; + int doc = -1; + + DenseBinaryDocValues(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int docID() { + return doc; + } + + @Override + public long cost() { + return maxDoc; + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public boolean advanceExact(int target) throws IOException { + doc = target; + return true; + } + } + + private static abstract class SparseBinaryDocValues extends BinaryDocValues { + + final IndexedDISI disi; + + SparseBinaryDocValues(IndexedDISI disi) { + this.disi = disi; + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public long cost() { + return disi.cost(); + } + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advanceExact(target); + } + } + @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { BinaryEntry entry = binaries.get(field.name); @@ -474,116 +606,77 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close return DocValues.emptyBinary(); } - IndexInput bytesSlice = data.slice("fixed-binary", entry.dataOffset, entry.dataLength); - BytesRefs bytesRefs; - if (entry.minLength == entry.maxLength) { - bytesRefs = new BytesRefs() { - BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength); - @Override - public BytesRef get(int index) throws IOException { - bytesSlice.seek((long) index * bytes.length); - bytesSlice.readBytes(bytes.bytes, 0, bytes.length); - return bytes; - } - }; - } else { - final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); - final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); - bytesRefs = new BytesRefs() { - BytesRef bytes = new BytesRef(entry.maxLength); - @Override - BytesRef get(int index) throws IOException { - long startOffset = addresses.get(index); - bytes.length = (int) (addresses.get(index + 1L) - startOffset); - bytesSlice.seek(startOffset); - bytesSlice.readBytes(bytes.bytes, 0, bytes.length); - return bytes; - } - }; - } + final IndexInput bytesSlice = data.slice("fixed-binary", entry.dataOffset, entry.dataLength); if (entry.docsWithFieldOffset == -1) { // dense - return new BinaryDocValues() { + if (entry.minLength == entry.maxLength) { + // fixed length + final int length = entry.maxLength; + return new DenseBinaryDocValues(maxDoc) { + final BytesRef bytes = new BytesRef(new byte[length], 0, length); - int doc = -1; - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int docID() { - return doc; - } - - @Override - public long cost() { - return maxDoc; - } - - @Override - public int advance(int target) throws IOException { - if (target >= maxDoc) { - return doc = NO_MORE_DOCS; + @Override + public BytesRef binaryValue() throws IOException { + bytesSlice.seek((long) doc * length); + bytesSlice.readBytes(bytes.bytes, 0, length); + return bytes; } - return doc = target; - } + }; + } else { + // variable length + final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); + return new DenseBinaryDocValues(maxDoc) { + final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength); - @Override - public boolean advanceExact(int target) throws IOException { - doc = target; - return true; - } - - @Override - public BytesRef binaryValue() throws IOException { - return bytesRefs.get(doc); - } - }; + @Override + public BytesRef binaryValue() throws IOException { + long startOffset = addresses.get(doc); + bytes.length = (int) (addresses.get(doc + 1L) - startOffset); + bytesSlice.seek(startOffset); + bytesSlice.readBytes(bytes.bytes, 0, bytes.length); + return bytes; + } + }; + } } else { // sparse final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField); - return new BinaryDocValues() { + if (entry.minLength == entry.maxLength) { + // fixed length + final int length = entry.maxLength; + return new SparseBinaryDocValues(disi) { + final BytesRef bytes = new BytesRef(new byte[length], 0, length); - @Override - public int nextDoc() throws IOException { - return disi.nextDoc(); - } + @Override + public BytesRef binaryValue() throws IOException { + bytesSlice.seek((long) disi.index() * length); + bytesSlice.readBytes(bytes.bytes, 0, length); + return bytes; + } + }; + } else { + // variable length + final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); + return new SparseBinaryDocValues(disi) { + final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength); - @Override - public int docID() { - return disi.docID(); - } - - @Override - public long cost() { - return disi.cost(); - } - - @Override - public int advance(int target) throws IOException { - return disi.advance(target); - } - - @Override - public boolean advanceExact(int target) throws IOException { - return disi.advanceExact(target); - } - - @Override - public BytesRef binaryValue() throws IOException { - return bytesRefs.get(disi.index()); - } - }; + @Override + public BytesRef binaryValue() throws IOException { + final int index = disi.index(); + long startOffset = addresses.get(index); + bytes.length = (int) (addresses.get(index + 1L) - startOffset); + bytesSlice.seek(startOffset); + bytesSlice.readBytes(bytes.bytes, 0, bytes.length); + return bytes; + } + }; + } } } - private static abstract class BytesRefs { - abstract BytesRef get(int index) throws IOException; - } - @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { SortedEntry entry = sorted.get(field.name);