From 08812d699416e6b7f249cce4f7ffe76873346038 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 19 Nov 2012 17:33:30 +0000 Subject: [PATCH] added in-memory support for 4.1 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1411325 13f79535-47bb-0310-9956-ffa450edef68 --- .../values/Lucene41BinaryDocValues.java | 25 ++++++++--- .../values/Lucene41DocValuesConsumer.java | 2 +- .../values/Lucene41NumericDocValues.java | 44 +++++++++++++------ .../values/Lucene41SortedDocValues.java | 33 ++++++++++---- .../Lucene41SortedDocValuesConsumer.java | 5 ++- 5 files changed, 78 insertions(+), 31 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41BinaryDocValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41BinaryDocValues.java index 59aa9cfc8c7..4ba4b4bf7f6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41BinaryDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41BinaryDocValues.java @@ -21,10 +21,8 @@ import java.io.IOException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.lucene41.values.Lucene41DocValuesProducer.DocValuesFactory; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -35,21 +33,23 @@ import org.apache.lucene.util.packed.PackedInts; import static org.apache.lucene.codecs.lucene41.values.Lucene41BinaryDocValuesConsumer.*; -public class Lucene41BinaryDocValues extends BinaryDocValues { +public final class Lucene41BinaryDocValues extends BinaryDocValues { private final PackedInts.Reader index; private final IndexInput data; private final long baseOffset; private final int size; private int maxLength; + private final DocValuesFactory factory; public Lucene41BinaryDocValues(IndexInput dataIn, long dataOffset, int size, - int maxLength, PackedInts.Reader index) throws IOException { + int maxLength, PackedInts.Reader index, DocValuesFactory factory) throws IOException { this.data = dataIn; this.size = size; this.maxLength = maxLength; this.baseOffset = dataOffset; this.index = index; + this.factory = factory; } public void get(int docId, BytesRef result) { @@ -91,6 +91,19 @@ public class Lucene41BinaryDocValues extends BinaryDocValues { return maxLength; } + + + @Override + public BinaryDocValues newRAMInstance() { + try { + return factory == null ? this : factory.getInMemory(); + } catch (IOException e) { + return this; // nocommit ?? now IOException + } + } + + + public static final class Factory extends DocValuesFactory { private final IndexInput datIn; private final IndexInput indexIn; @@ -142,7 +155,7 @@ public class Lucene41BinaryDocValues extends BinaryDocValues { return new Lucene41BinaryDocValues(datIn.clone(), this.baseOffset, size, maxLength, indexHeader == null ? null : PackedInts.getDirectReaderNoHeader( - indexIn.clone(), indexHeader)); + indexIn.clone(), indexHeader), this); } public BinaryDocValues getInMemory() throws IOException { @@ -154,7 +167,7 @@ public class Lucene41BinaryDocValues extends BinaryDocValues { : indexReader.get(indexReader.size() - 1)); bytes.freeze(true); return new Lucene41BinaryDocValues(bytes.getDataInput(), 0, size, - maxLength, indexReader); + maxLength, indexReader, null); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesConsumer.java index 13f96b36e4f..e6e6c2c0caa 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41DocValuesConsumer.java @@ -138,7 +138,7 @@ public class Lucene41DocValuesConsumer extends SimpleDVConsumer { offsetOut = getDirectory().createOutput(offOut, context); } Lucene41SortedDocValuesConsumer consumer = new Lucene41SortedDocValuesConsumer( - dataOut, indexOut, offsetOut, valueCount, maxLength); + dataOut, indexOut, offsetOut, valueCount, maxLength, this.info.getDocCount()); success = true; return consumer; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41NumericDocValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41NumericDocValues.java index 32b677078a9..d02b15ec5e7 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41NumericDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41NumericDocValues.java @@ -1,4 +1,5 @@ package org.apache.lucene.codecs.lucene41.values; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -29,18 +30,19 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts.Reader; - - public class Lucene41NumericDocValues extends NumericDocValues { private final long minValue; private final Reader values; private final long maxValue; + private final DocValuesFactory factory; - public Lucene41NumericDocValues(PackedInts.Reader reader, long minValue, long maxValue) { + public Lucene41NumericDocValues(PackedInts.Reader reader, long minValue, + long maxValue, DocValuesFactory factory) { this.values = reader; this.minValue = minValue; this.maxValue = maxValue; + this.factory = factory; } @Override @@ -49,18 +51,30 @@ public class Lucene41NumericDocValues extends NumericDocValues { return values.get(docID) + minValue; } + @Override + public NumericDocValues newRAMInstance() { + try { + return factory == null ? this : factory.getInMemory(); + } catch (IOException e) { + return this; // nocommit ?? now IOException + } + } + public static final class Factory extends DocValuesFactory { private final IndexInput datIn; private final PackedInts.Header header; private final long minValue; private final long maxValue; - - public Factory(Directory dir, SegmentInfo segmentInfo, FieldInfo field, IOContext context) throws IOException { - this.datIn = dir.openInput(Lucene41DocValuesConsumer.getDocValuesFileName(segmentInfo, field, - Lucene41DocValuesConsumer.DATA_EXTENSION), context); + + public Factory(Directory dir, SegmentInfo segmentInfo, FieldInfo field, + IOContext context) throws IOException { + this.datIn = dir.openInput(Lucene41DocValuesConsumer + .getDocValuesFileName(segmentInfo, field, + Lucene41DocValuesConsumer.DATA_EXTENSION), context); boolean success = false; try { - CodecUtil.checkHeader(datIn, Lucene41NumericDocValuesConsumer.CODEC_NAME, + CodecUtil.checkHeader(datIn, + Lucene41NumericDocValuesConsumer.CODEC_NAME, Lucene41NumericDocValuesConsumer.VERSION_START, Lucene41NumericDocValuesConsumer.VERSION_START); minValue = datIn.readLong(); @@ -76,30 +90,32 @@ public class Lucene41NumericDocValues extends NumericDocValues { public NumericDocValues getDirect() throws IOException { IndexInput input = datIn.clone(); - return new Lucene41NumericDocValues(PackedInts.getDirectReaderNoHeader(input, header), minValue, maxValue); + return new Lucene41NumericDocValues(PackedInts.getDirectReaderNoHeader( + input, header), minValue, maxValue, this); } public NumericDocValues getInMemory() throws IOException { IndexInput input = datIn.clone(); - return new Lucene41NumericDocValues(PackedInts.getReaderNoHeader(input, header), minValue, maxValue); + return new Lucene41NumericDocValues(PackedInts.getReaderNoHeader(input, + header), minValue, maxValue, null); } - + @Override public void close() throws IOException { IOUtils.close(datIn); } } - + @Override public long minValue() { return minValue; } - + @Override public long maxValue() { return maxValue; } - + @Override public int size() { return values.size(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValues.java index a8b984e9e54..180b3c83138 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValues.java @@ -43,17 +43,20 @@ public class Lucene41SortedDocValues extends SortedDocValues { private final int valueCount; private int size; private int maxLength; + private final DocValuesFactory factory; public Lucene41SortedDocValues(IndexInput dataIn, long dataOffset, int size, - int maxLength, PackedInts.Reader index, PackedInts.Reader offsets) + int maxLength, int valueCount, PackedInts.Reader index, PackedInts.Reader offsets, DocValuesFactory factory) throws IOException { this.data = dataIn; this.size = size; this.maxLength = maxLength; this.baseOffset = dataOffset; + this.valueCount = valueCount; this.docToOrdIndex = index; - this.valueCount = docToOrdIndex.size(); ordToOffsetIndex = offsets; + this.factory = factory; + } @Override @@ -64,12 +67,15 @@ public class Lucene41SortedDocValues extends SortedDocValues { @Override public void lookupOrd(int ord, BytesRef result) { try { + assert ord < valueCount; final long offset; final int length; if (ordToOffsetIndex != null) { offset = ordToOffsetIndex.get(ord); + // 1+ord is safe because we write a sentinel at the end final long nextOffset = ordToOffsetIndex.get(1 + ord); + assert offset <= nextOffset : "offset: " + offset + " nextOffset: " + nextOffset + " ord: " + ord + " numValues: " + valueCount; length = (int) (nextOffset - offset); } else { length = size; @@ -105,6 +111,15 @@ public class Lucene41SortedDocValues extends SortedDocValues { return maxLength; } + @Override + public SortedDocValues newRAMInstance() { + try { + return factory == null ? this : factory.getInMemory(); + } catch (IOException e) { + return this; // nocommit ?? now IOException + } + } + public static final class Factory extends DocValuesFactory { private final IndexInput datIn; @@ -112,9 +127,10 @@ public class Lucene41SortedDocValues extends SortedDocValues { private final PackedInts.Header offsetHeader; private final IndexInput indexIn; private final PackedInts.Header indexHeader; - private int size; - private int maxLength; - private long baseOffset; + private final int size; + private final int maxLength; + private final long baseOffset; + private final int valueCount; public Factory(Directory dir, SegmentInfo segmentInfo, FieldInfo field, IOContext context) @@ -136,6 +152,7 @@ public class Lucene41SortedDocValues extends SortedDocValues { indexHeader = PackedInts.readHeader(indexIn); this.size = datIn.readInt(); this.maxLength = datIn.readInt(); + this.valueCount = datIn.readInt(); this.baseOffset = datIn.getFilePointer(); if (size == Lucene41BinaryDocValuesConsumer.VALUE_SIZE_VAR) { @@ -162,10 +179,10 @@ public class Lucene41SortedDocValues extends SortedDocValues { public SortedDocValues getDirect() throws IOException { return new Lucene41SortedDocValues(datIn.clone(), this.baseOffset, size, - maxLength, PackedInts.getDirectReaderNoHeader(indexIn.clone(), + maxLength, valueCount, PackedInts.getDirectReaderNoHeader(indexIn.clone(), indexHeader), offsetHeader == null ? null : PackedInts.getDirectReaderNoHeader(offsetIn.clone(), - offsetHeader)); + offsetHeader), this); } public Lucene41SortedDocValues getInMemory() throws IOException { @@ -181,7 +198,7 @@ public class Lucene41SortedDocValues extends SortedDocValues { .get(offsetReader.size() - 1)); bytes.freeze(true); return new Lucene41SortedDocValues(bytes.getDataInput(), 0, size, - maxLength, indexReader, offsetReader); + maxLength, valueCount, indexReader, offsetReader, this); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValuesConsumer.java index 4d82b77e58b..1b9a312d6e9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/Lucene41SortedDocValuesConsumer.java @@ -40,7 +40,7 @@ public class Lucene41SortedDocValuesConsumer extends SortedDocValuesConsumer { private final int valueCount; public Lucene41SortedDocValuesConsumer(IndexOutput dataOut, - IndexOutput indexOut, IndexOutput offsetOut, int valueCount, int maxLength) + IndexOutput indexOut, IndexOutput offsetOut, int valueCount, int maxLength, int docCount) throws IOException { int size; if (offsetOut != null) { @@ -56,11 +56,12 @@ public class Lucene41SortedDocValuesConsumer extends SortedDocValuesConsumer { CodecUtil.writeHeader(dataOut, CODEC_NAME, VERSION_START); dataOut.writeInt(size); dataOut.writeInt(maxLength); + dataOut.writeInt(valueCount); CodecUtil.writeHeader(indexOut, CODEC_NAME, VERSION_START); this.data = dataOut; this.index = indexOut; this.valueCount = valueCount; - ords = PackedInts.getWriter(index, valueCount, + ords = PackedInts.getWriter(index, docCount, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT); }