From 75d62170915a92672bc7df625f7d7043c94b12a6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 10 Dec 2011 17:43:00 +0000 Subject: [PATCH] LUCENE-3622: PerDocValues -> PerDocProducer (privately used only by SegmentCoreReaders), simplify IR API and MultiDocValues, remove MultiPerDocValues git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3622@1212828 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/memory/MemoryIndex.java | 4 +- .../org/apache/lucene/index/CheckIndex.java | 5 +- .../apache/lucene/index/DirectoryReader.java | 7 +- .../lucene/index/FilterIndexReader.java | 4 +- .../org/apache/lucene/index/IndexReader.java | 52 ++----- .../apache/lucene/index/MultiDocValues.java | 130 ++++++++++------ .../apache/lucene/index/MultiFieldsEnum.java | 9 -- .../lucene/index/MultiPerDocValues.java | 147 ------------------ .../org/apache/lucene/index/MultiReader.java | 5 +- .../apache/lucene/index/ParallelReader.java | 35 +---- .../lucene/index/SegmentCoreReaders.java | 3 +- .../apache/lucene/index/SegmentReader.java | 9 +- .../lucene/index/SlowMultiReaderWrapper.java | 11 +- .../lucene/index/codecs/DocValuesFormat.java | 3 +- .../index/codecs/DocValuesReaderBase.java | 5 +- .../lucene/index/codecs/PerDocConsumer.java | 17 +- .../PerDocProducer.java} | 11 +- .../index/codecs/lucene3x/Lucene3xCodec.java | 4 +- .../lucene40/Lucene40DocValuesFormat.java | 4 +- .../lucene40/Lucene40DocValuesProducer.java | 2 +- .../codecs/sep/SepDocValuesProducer.java | 2 +- .../mocksep/MockSepDocValuesFormat.java | 4 +- .../apache/lucene/index/TestAddIndexes.java | 2 +- .../lucene/index/TestDuelingCodecs.java | 35 ++--- .../index/values/TestDocValuesIndexing.java | 30 +--- 25 files changed, 167 insertions(+), 373 deletions(-) delete mode 100644 lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java rename lucene/src/java/org/apache/lucene/index/{PerDocValues.java => codecs/PerDocProducer.java} (83%) diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 6f1869329ed..c38a9b7f52d 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -40,8 +40,8 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; -import org.apache.lucene.index.PerDocValues; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.StoredFieldVisitor; @@ -1159,7 +1159,7 @@ public class MemoryIndex { } @Override - public PerDocValues perDocValues() throws IOException { + public DocValues docValues(String field) throws IOException { return null; } } diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 2e6c7002396..6be57d5c09a 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -27,6 +27,8 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.document.Document; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.PerDocProducer; + import java.io.File; import java.io.IOException; import java.io.PrintStream; @@ -1130,8 +1132,7 @@ public class CheckIndex { for (FieldInfo fieldInfo : fieldInfos) { if (fieldInfo.hasDocValues()) { status.totalValueFields++; - final PerDocValues perDocValues = reader.perDocValues(); - final DocValues docValues = perDocValues.docValues(fieldInfo.name); + final DocValues docValues = reader.docValues(fieldInfo.name); if (docValues == null) { continue; } diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index e1468b75624..64c02c837b4 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -31,11 +31,8 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.Lock; -import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.MapBackedSet; /** @@ -716,7 +713,7 @@ class DirectoryReader extends IndexReader implements Cloneable { } @Override - public PerDocValues perDocValues() throws IOException { - throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); + public DocValues docValues(String field) throws IOException { + throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues"); } } diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 3c64cbeccd7..cdeb70c91fa 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -421,9 +421,9 @@ public class FilterIndexReader extends IndexReader { } @Override - public PerDocValues perDocValues() throws IOException { + public DocValues docValues(String field) throws IOException { ensureOpen(); - return in.perDocValues(); + return in.docValues(field); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 592e66dd09e..edc2142927c 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -773,21 +773,6 @@ public abstract class IndexReader implements Cloneable,Closeable { * through them yourself. */ public abstract Fields fields() throws IOException; - /** - * Returns {@link PerDocValues} for this reader. - * This method may return null if the reader has no per-document - * values stored. - * - *

NOTE: if this is a multi reader ({@link - * #getSequentialSubReaders} is not null) then this - * method will throw UnsupportedOperationException. If - * you really need {@link PerDocValues} for such a reader, - * use {@link MultiPerDocValues#getPerDocs(IndexReader)}. However, for - * performance reasons, it's best to get all sub-readers - * using {@link ReaderUtil#gatherSubReaders} and iterate - * through them yourself. */ - public abstract PerDocValues perDocValues() throws IOException; - public final int docFreq(Term term) throws IOException { return docFreq(term.field(), term.bytes()); } @@ -1146,14 +1131,20 @@ public abstract class IndexReader implements Cloneable,Closeable { throw new UnsupportedOperationException("This reader does not support this method."); } - public final DocValues docValues(String field) throws IOException { - ensureOpen(); - final PerDocValues perDoc = perDocValues(); - if (perDoc == null) { - return null; - } - return perDoc.docValues(field); - } + /** + * Returns {@link DocValues} for this field. + * This method may return null if the reader has no per-document + * values stored. + * + *

NOTE: if this is a multi reader ({@link + * #getSequentialSubReaders} is not null) then this + * method will throw UnsupportedOperationException. If + * you really need {@link DocValues} for such a reader, + * use {@link MultiDocValues#getDocValues(IndexReader,String)}. However, for + * performance reasons, it's best to get all sub-readers + * using {@link ReaderUtil#gatherSubReaders} and iterate + * through them yourself. */ + public abstract DocValues docValues(String field) throws IOException; private volatile Fields fields; @@ -1169,21 +1160,6 @@ public abstract class IndexReader implements Cloneable,Closeable { return fields; } - private volatile PerDocValues perDocValues; - - /** @lucene.internal */ - void storePerDoc(PerDocValues perDocValues) { - ensureOpen(); - this.perDocValues = perDocValues; - } - - /** @lucene.internal */ - PerDocValues retrievePerDoc() { - ensureOpen(); - return perDocValues; - } - - /** * A struct like class that represents a hierarchical relationship between * {@link IndexReader} instances. diff --git a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java index 529058e8362..9f1d16fa2a5 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/MultiDocValues.java @@ -17,10 +17,13 @@ package org.apache.lucene.index; * limitations under the License. */ import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.ReaderUtil.Gather; /** * A wrapper for compound IndexReader providing access to per segment @@ -31,59 +34,92 @@ import org.apache.lucene.util.ReaderUtil; */ public class MultiDocValues extends DocValues { - public static class DocValuesIndex { - public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; + public static class DocValuesSlice { + public final static DocValuesSlice[] EMPTY_ARRAY = new DocValuesSlice[0]; final int start; final int length; - final DocValues docValues; + DocValues docValues; - public DocValuesIndex(DocValues docValues, int start, int length) { + public DocValuesSlice(DocValues docValues, int start, int length) { this.docValues = docValues; this.start = start; this.length = length; } } - private DocValuesIndex[] docValuesIdx; + private DocValuesSlice[] slices; private int[] starts; private Type type; private int valueSize; - public MultiDocValues() { - starts = new int[0]; - docValuesIdx = new DocValuesIndex[0]; + private MultiDocValues(DocValuesSlice[] slices, int[] starts, TypePromoter promotedType) { + this.starts = starts; + this.slices = slices; + this.type = promotedType.type(); + this.valueSize = promotedType.getValueSize(); } - - public MultiDocValues(DocValuesIndex[] docValuesIdx) { - reset(docValuesIdx); + + /** + * Returns a single {@link DocValues} instance for this field, merging + * their values on the fly. + * + *

+ * NOTE: this is a slow way to access DocValues. It's better to get the + * sub-readers (using {@link Gather}) and iterate through them yourself. + */ + public static DocValues getDocValues(IndexReader r, final String field) throws IOException { + final IndexReader[] subs = r.getSequentialSubReaders(); + if (subs == null) { + // already an atomic reader + return r.docValues(field); + } else if (subs.length == 0) { + // no fields + return null; + } else if (subs.length == 1) { + return getDocValues(subs[0], field); + } else { + final List slices = new ArrayList(); + + final TypePromoter promotedType[] = new TypePromoter[1]; + promotedType[0] = TypePromoter.getIdentityPromoter(); + + // gather all docvalues fields, accumulating a promoted type across + // potentially incompatible types + + new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + final DocValues d = r.docValues(field); + if (d != null) { + TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize()); + promotedType[0] = promotedType[0].promote(incoming); + } + slices.add(new DocValuesSlice(d, base, r.maxDoc())); + } + }.run(); + + // return null if no docvalues encountered anywhere + if (promotedType[0] == TypePromoter.getIdentityPromoter()) { + return null; + } + + // populate starts and fill gaps with empty docvalues + int starts[] = new int[slices.size()]; + for (int i = 0; i < slices.size(); i++) { + DocValuesSlice slice = slices.get(i); + starts[i] = slice.start; + if (slice.docValues == null) { + slice.docValues = new EmptyDocValues(slice.length, promotedType[0].type()); + } + } + + return new MultiDocValues(slices.toArray(new DocValuesSlice[slices.size()]), starts, promotedType[0]); + } } @Override public Source load() throws IOException { - return new MultiSource(docValuesIdx, starts, false); - } - - public DocValues reset(DocValuesIndex[] docValuesIdx) { - final int[] start = new int[docValuesIdx.length]; - TypePromoter promoter = TypePromoter.getIdentityPromoter(); - for (int i = 0; i < docValuesIdx.length; i++) { - start[i] = docValuesIdx[i].start; - if (!(docValuesIdx[i].docValues instanceof EmptyDocValues)) { - // only promote if not a dummy - final TypePromoter incomingPromoter = TypePromoter.create( - docValuesIdx[i].docValues.type(), - docValuesIdx[i].docValues.getValueSize()); - promoter = promoter.promote(incomingPromoter); - if (promoter == null) { - throw new IllegalStateException("Can not promote " + incomingPromoter); - } - } - } - this.type = promoter.type(); - this.valueSize = promoter.getValueSize(); - this.starts = start; - this.docValuesIdx = docValuesIdx; - return this; + return new MultiSource(slices, starts, false, type); } public static class EmptyDocValues extends DocValues { @@ -117,14 +153,14 @@ public class MultiDocValues extends DocValues { private int start = 0; private Source current; private final int[] starts; - private final DocValuesIndex[] docValuesIdx; + private final DocValuesSlice[] slices; private boolean direct; - public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts, boolean direct) { - super(docValuesIdx[0].docValues.type()); - this.docValuesIdx = docValuesIdx; + public MultiSource(DocValuesSlice[] slices, int[] starts, boolean direct, Type type) { + super(type); + this.slices = slices; this.starts = starts; - assert docValuesIdx.length != 0; + assert slices.length != 0; this.direct = direct; } @@ -138,22 +174,22 @@ public class MultiDocValues extends DocValues { return docID - start; } else { final int idx = ReaderUtil.subIndex(docID, starts); - assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx + assert idx >= 0 && idx < slices.length : "idx was " + idx + " for doc id: " + docID + " slices : " + Arrays.toString(starts); - assert docValuesIdx[idx] != null; + assert slices[idx] != null; try { if (direct) { - current = docValuesIdx[idx].docValues.getDirectSource(); + current = slices[idx].docValues.getDirectSource(); } else { - current = docValuesIdx[idx].docValues.getSource(); + current = slices[idx].docValues.getSource(); } } catch (IOException e) { throw new RuntimeException("load failed", e); // TODO how should we // handle this } - start = docValuesIdx[idx].start; - numDocs = docValuesIdx[idx].length; + start = slices[idx].start; + numDocs = slices[idx].length; return docID - start; } } @@ -206,6 +242,6 @@ public class MultiDocValues extends DocValues { @Override public Source getDirectSource() throws IOException { - return new MultiSource(docValuesIdx, starts, true); + return new MultiSource(slices, starts, true, type); } } diff --git a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java index 2411f5dca63..7db5f02fac5 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java @@ -38,14 +38,9 @@ public final class MultiFieldsEnum extends FieldsEnum { // Holds sub-readers containing field we are currently // on, popped from queue. private final FieldsEnumWithSlice[] top; - private final FieldsEnumWithSlice[] enumWithSlices; private int numTop; - // Re-used TermsEnum - private final MultiTermsEnum terms; - private final MultiDocValues docValues; - private final Fields fields; private String currentField; @@ -54,9 +49,7 @@ public final class MultiFieldsEnum extends FieldsEnum { * (ie, {@link FieldsEnum#next} has not been called. */ public MultiFieldsEnum(MultiFields fields, FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException { this.fields = fields; - terms = new MultiTermsEnum(subSlices); queue = new FieldMergeQueue(subs.length); - docValues = new MultiDocValues(); top = new FieldsEnumWithSlice[subs.length]; List enumWithSlices = new ArrayList(); @@ -72,8 +65,6 @@ public final class MultiFieldsEnum extends FieldsEnum { queue.add(sub); } } - this.enumWithSlices = enumWithSlices.toArray(FieldsEnumWithSlice.EMPTY_ARRAY); - } @Override diff --git a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java b/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java deleted file mode 100644 index 8f7a6a7d9e7..00000000000 --- a/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java +++ /dev/null @@ -1,147 +0,0 @@ -package org.apache.lucene.index; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.lucene.index.MultiDocValues.DocValuesIndex; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.ReaderUtil; -import org.apache.lucene.util.ReaderUtil.Gather; - -/** - * Exposes per-document values, merged from per-document values API of - * sub-segments. This is useful when you're interacting with an {@link IndexReader} - * implementation that consists of sequential sub-readers (eg DirectoryReader - * or {@link MultiReader}). - * - *

- * NOTE: for multi readers, you'll get better performance by gathering - * the sub readers using {@link ReaderUtil#gatherSubReaders} and then operate - * per-reader, instead of using this class. - * - * @lucene.experimental - */ -public class MultiPerDocValues extends PerDocValues { - private final PerDocValues[] subs; - private final ReaderUtil.Slice[] subSlices; - private final Map docValues = new ConcurrentHashMap(); - - public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) { - this.subs = subs; - this.subSlices = subSlices; - } - - /** - * Returns a single {@link PerDocValues} instance for this reader, merging - * their values on the fly. This method will not return null. - * - *

- * NOTE: this is a slow way to access postings. It's better to get the - * sub-readers (using {@link Gather}) and iterate through them yourself. - */ - public static PerDocValues getPerDocs(IndexReader r) throws IOException { - final IndexReader[] subs = r.getSequentialSubReaders(); - if (subs == null) { - // already an atomic reader - return r.perDocValues(); - } else if (subs.length == 0) { - // no fields - return null; - } else if (subs.length == 1) { - return getPerDocs(subs[0]); - } - PerDocValues perDocValues = r.retrievePerDoc(); - if (perDocValues == null) { - - final List producer = new ArrayList(); - final List slices = new ArrayList(); - - new ReaderUtil.Gather(r) { - @Override - protected void add(int base, IndexReader r) throws IOException { - final PerDocValues f = r.perDocValues(); - if (f != null) { - producer.add(f); - slices - .add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1)); - } - } - }.run(); - - if (producer.size() == 0) { - return null; - } else if (producer.size() == 1) { - perDocValues = producer.get(0); - } else { - perDocValues = new MultiPerDocValues( - producer.toArray(PerDocValues.EMPTY_ARRAY), - slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)); - } - r.storePerDoc(perDocValues); - } - return perDocValues; - } - - public DocValues docValues(String field) throws IOException { - DocValues result = docValues.get(field); - if (result == null) { - // Lazy init: first time this field is requested, we - // create & add to docValues: - final List docValuesIndex = new ArrayList(); - int docsUpto = 0; - DocValues.Type type = null; - // Gather all sub-readers that share this field - for (int i = 0; i < subs.length; i++) { - DocValues values = subs[i].docValues(field); - final int start = subSlices[i].start; - final int length = subSlices[i].length; - if (values != null) { - if (docsUpto != start) { - type = values.type(); - docValuesIndex.add(new MultiDocValues.DocValuesIndex( - new MultiDocValues.EmptyDocValues(start, type), docsUpto, start - - docsUpto)); - } - docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, - length)); - docsUpto = start + length; - - } else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) { - docValuesIndex.add(new MultiDocValues.DocValuesIndex( - new MultiDocValues.EmptyDocValues(start, type), docsUpto, start - - docsUpto)); - } - } - if (docValuesIndex.isEmpty()) { - return null; - } - result = new MultiDocValues( - docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY)); - docValues.put(field, result); - } - return result; - } - - public void close() throws IOException { - IOUtils.close(this.subs); - } -} diff --git a/lucene/src/java/org/apache/lucene/index/MultiReader.java b/lucene/src/java/org/apache/lucene/index/MultiReader.java index 6eb41266918..21d075ee8cd 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiReader.java +++ b/lucene/src/java/org/apache/lucene/index/MultiReader.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; @@ -330,7 +331,7 @@ public class MultiReader extends IndexReader implements Cloneable { } @Override - public PerDocValues perDocValues() throws IOException { - throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); + public DocValues docValues(String field) throws IOException { + throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues"); } } diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index 6b3d9eaf9f8..6582b65b4bd 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -57,7 +57,6 @@ public class ParallelReader extends IndexReader { private boolean hasDeletions; private final ParallelFields fields = new ParallelFields(); - private final ParallelPerDocs perDocs = new ParallelPerDocs(); /** Construct a ParallelReader. *

Note that all subreaders are closed if this ParallelReader is closed.

@@ -130,7 +129,6 @@ public class ParallelReader extends IndexReader { if (fieldToReader.get(field) == null) { fieldToReader.put(field, reader); this.fields.addField(field, MultiFields.getFields(reader).terms(field)); - this.perDocs.addField(field, reader); } } @@ -463,35 +461,8 @@ public class ParallelReader extends IndexReader { } @Override - public PerDocValues perDocValues() throws IOException { - ensureOpen(); - return perDocs; - } - - // Single instance of this, per ParallelReader instance - private static final class ParallelPerDocs extends PerDocValues { - final TreeMap fields = new TreeMap(); - - void addField(String field, IndexReader r) throws IOException { - PerDocValues perDocs = MultiPerDocValues.getPerDocs(r); - if (perDocs != null) { - fields.put(field, perDocs.docValues(field)); - } - } - - @Override - public void close() throws IOException { - // nothing to do here - } - - @Override - public DocValues docValues(String field) throws IOException { - return fields.get(field); - } + public DocValues docValues(String field) throws IOException { + IndexReader reader = fieldToReader.get(field); + return reader == null ? null : reader.docValues(field); } } - - - - - diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 65755b43a28..6f6d7d1cf88 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -22,6 +22,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.NormsReader; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.StoredFieldsReader; @@ -47,7 +48,7 @@ final class SegmentCoreReaders { final FieldInfos fieldInfos; final FieldsProducer fields; - final PerDocValues perDocProducer; + final PerDocProducer perDocProducer; final NormsReader norms; final Directory dir; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 7eb54687616..2b26fc6f57a 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -27,6 +27,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.store.Directory; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.index.codecs.StoredFieldsReader; import org.apache.lucene.index.codecs.TermVectorsReader; import org.apache.lucene.store.IOContext; @@ -424,9 +425,13 @@ public final class SegmentReader extends IndexReader implements Cloneable { } @Override - public PerDocValues perDocValues() throws IOException { + public DocValues docValues(String field) throws IOException { ensureOpen(); - return core.perDocProducer; + final PerDocProducer perDoc = core.perDocProducer; + if (perDoc == null) { + return null; + } + return perDoc.docValues(field); } /** diff --git a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java index f945f5e4976..1d021db3579 100644 --- a/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java +++ b/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java @@ -34,11 +34,12 @@ import org.apache.lucene.index.MultiReader; // javadoc * IndexReader#getSequentialSubReaders}) to emulate an * atomic reader. This requires implementing the postings * APIs on-the-fly, using the static methods in {@link - * MultiFields}, by stepping through the sub-readers to - * merge fields/terms, appending docs, etc. + * MultiFields}, {@link MultiNorms}, {@link MultiDocValues}, + * by stepping through the sub-readers to merge fields/terms, + * appending docs, etc. * *

If you ever hit an UnsupportedOperationException saying - * "please use MultiFields.XXX instead", the simple + * "please use MultiXXX.YYY instead", the simple * but non-performant workaround is to wrap your reader * using this class.

* @@ -71,9 +72,9 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader { } @Override - public PerDocValues perDocValues() throws IOException { + public DocValues docValues(String field) throws IOException { ensureOpen(); - return MultiPerDocValues.getPerDocs(in); + return MultiDocValues.getDocValues(in, field); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DocValuesFormat.java b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesFormat.java index 1e269470e9d..466f458a653 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DocValuesFormat.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesFormat.java @@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs; import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.PerDocValues; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; @@ -28,6 +27,6 @@ import org.apache.lucene.store.Directory; public abstract class DocValuesFormat { public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; - public abstract PerDocValues docsProducer(SegmentReadState state) throws IOException; + public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException; public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java index 4530acf99aa..92f07b2b161 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/DocValuesReaderBase.java @@ -28,7 +28,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Type; // javadocs -import org.apache.lucene.index.PerDocValues; import org.apache.lucene.index.codecs.lucene40.values.Bytes; import org.apache.lucene.index.codecs.lucene40.values.Floats; import org.apache.lucene.index.codecs.lucene40.values.Ints; @@ -37,11 +36,11 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.util.BytesRef; /** - * Abstract base class for PerDocValues implementations + * Abstract base class for PerDocProducer implementations * @lucene.experimental */ // TODO: this needs to go under lucene40 codec (its specific to its impl) -public abstract class DocValuesReaderBase extends PerDocValues { +public abstract class DocValuesReaderBase extends PerDocProducer { protected abstract void closeInternal(Collection closeables) throws IOException; protected abstract Map docValues(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java index f2d1f814be9..5b99ce062c3 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocConsumer.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.PerDocValues; /** * Abstract API that consumes per document values. Concrete implementations of @@ -39,30 +38,22 @@ public abstract class PerDocConsumer implements Closeable{ throws IOException; /** - * Consumes and merges the given {@link PerDocValues} producer + * Consumes and merges the given {@link PerDocProducer} producer * into this consumers format. */ public void merge(MergeState mergeState) throws IOException { final DocValues[] docValues = new DocValues[mergeState.readers.size()]; - final PerDocValues[] perDocValues = new PerDocValues[mergeState.readers.size()]; - // pull all PerDocValues - for (int i = 0; i < perDocValues.length; i++) { - perDocValues[i] = mergeState.readers.get(i).reader.perDocValues(); - } + for (FieldInfo fieldInfo : mergeState.fieldInfos) { mergeState.fieldInfo = fieldInfo; // set the field we are merging if (fieldInfo.hasDocValues()) { - for (int i = 0; i < perDocValues.length; i++) { - if (perDocValues[i] != null) { // get all IDV to merge - docValues[i] = perDocValues[i].docValues(fieldInfo.name); - } + for (int i = 0; i < docValues.length; i++) { + docValues[i] = mergeState.readers.get(i).reader.docValues(fieldInfo.name); } final DocValuesConsumer docValuesConsumer = addValuesField(fieldInfo.getDocValuesType(), fieldInfo); assert docValuesConsumer != null; docValuesConsumer.merge(mergeState, docValues); } } - /* NOTE: don't close the perDocProducers here since they are private segment producers - * and will be closed once the SegmentReader goes out of scope */ } } diff --git a/lucene/src/java/org/apache/lucene/index/PerDocValues.java b/lucene/src/java/org/apache/lucene/index/codecs/PerDocProducer.java similarity index 83% rename from lucene/src/java/org/apache/lucene/index/PerDocValues.java rename to lucene/src/java/org/apache/lucene/index/codecs/PerDocProducer.java index 08a91577d02..ff2827d8c72 100644 --- a/lucene/src/java/org/apache/lucene/index/PerDocValues.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/PerDocProducer.java @@ -1,4 +1,4 @@ -package org.apache.lucene.index; +package org.apache.lucene.index.codecs; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -18,8 +18,7 @@ package org.apache.lucene.index; import java.io.Closeable; import java.io.IOException; -import org.apache.lucene.index.codecs.PerDocConsumer; -import org.apache.lucene.index.codecs.PostingsFormat; +import org.apache.lucene.index.DocValues; /** * Abstract API that provides access to one or more per-document storage @@ -27,13 +26,13 @@ import org.apache.lucene.index.codecs.PostingsFormat; * storage on a per-document basis corresponding to their actual * {@link PerDocConsumer} counterpart. *

- * The {@link PerDocValues} API is accessible through the + * The {@link PerDocProducer} API is accessible through the * {@link PostingsFormat} - API providing per field consumers and producers for inverted * data (terms, postings) as well as per-document data. * * @lucene.experimental */ -public abstract class PerDocValues implements Closeable { +public abstract class PerDocProducer implements Closeable { /** * Returns {@link DocValues} for the current field. * @@ -45,5 +44,5 @@ public abstract class PerDocValues implements Closeable { */ public abstract DocValues docValues(String field) throws IOException; - public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0]; + public static final PerDocProducer[] EMPTY_ARRAY = new PerDocProducer[0]; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java index 19689788375..4ad0b8b4e9d 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java @@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs.lucene3x; import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.PerDocValues; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; @@ -28,6 +27,7 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.FieldInfosFormat; import org.apache.lucene.index.codecs.NormsFormat; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PostingsFormat; @@ -75,7 +75,7 @@ public class Lucene3xCodec extends Codec { } @Override - public PerDocValues docsProducer(SegmentReadState state) throws IOException { + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { return null; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesFormat.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesFormat.java index 0257e8ba9bf..28bd0b25362 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesFormat.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesFormat.java @@ -20,12 +20,12 @@ package org.apache.lucene.index.codecs.lucene40; import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.PerDocValues; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.store.Directory; public class Lucene40DocValuesFormat extends DocValuesFormat { @@ -36,7 +36,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat { } @Override - public PerDocValues docsProducer(SegmentReadState state) throws IOException { + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { return new Lucene40DocValuesProducer(state); } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesProducer.java index c737486d610..c9baf1fa505 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40DocValuesProducer.java @@ -33,7 +33,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; /** - * Default PerDocValues implementation that uses compound file. + * Default PerDocProducer implementation that uses compound file. * @lucene.experimental */ public class Lucene40DocValuesProducer extends DocValuesReaderBase { diff --git a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesProducer.java index 82daa2ee41e..349417effc1 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesProducer.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepDocValuesProducer.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.codecs.DocValuesReaderBase; import org.apache.lucene.util.IOUtils; /** - * Implementation of PerDocValues that uses separate files. + * Implementation of PerDocProducer that uses separate files. * @lucene.experimental */ public class SepDocValuesProducer extends DocValuesReaderBase { diff --git a/lucene/src/test-framework/java/org/apache/lucene/index/codecs/mocksep/MockSepDocValuesFormat.java b/lucene/src/test-framework/java/org/apache/lucene/index/codecs/mocksep/MockSepDocValuesFormat.java index 698b8bf37ca..595f6b6c770 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/index/codecs/mocksep/MockSepDocValuesFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/index/codecs/mocksep/MockSepDocValuesFormat.java @@ -20,12 +20,12 @@ package org.apache.lucene.index.codecs.mocksep; import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.PerDocValues; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.index.codecs.sep.SepDocValuesConsumer; import org.apache.lucene.index.codecs.sep.SepDocValuesProducer; import org.apache.lucene.store.Directory; @@ -43,7 +43,7 @@ public class MockSepDocValuesFormat extends DocValuesFormat { } @Override - public PerDocValues docsProducer(SegmentReadState state) throws IOException { + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { return new SepDocValuesProducer(state); } diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index ab0b586a36e..0ab5a28657d 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -1284,7 +1284,7 @@ public class TestAddIndexes extends LuceneTestCase { w.close(); IndexReader sr = getOnlySegmentReader(r3); assertEquals(2, sr.numDocs()); - DocValues docValues = sr.perDocValues().docValues("dv"); + DocValues docValues = sr.docValues("dv"); assertNotNull(docValues); r3.close(); d3.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java index b9313fce97e..e7b483b8a96 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -18,12 +18,15 @@ package org.apache.lucene.index; */ import java.io.IOException; +import java.util.HashSet; import java.util.Iterator; import java.util.Random; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; @@ -516,29 +519,15 @@ public class TestDuelingCodecs extends LuceneTestCase { * checks that docvalues across all fields are equivalent */ public void assertDocValues(IndexReader leftReader, IndexReader rightReader) throws Exception { - PerDocValues leftPerDoc = MultiPerDocValues.getPerDocs(leftReader); - PerDocValues rightPerDoc = MultiPerDocValues.getPerDocs(rightReader); - - Fields leftFields = MultiFields.getFields(leftReader); - Fields rightFields = MultiFields.getFields(rightReader); - // Fields could be null if there are no postings, - // but then it must be null for both - if (leftFields == null || rightFields == null) { - assertNull(info, leftFields); - assertNull(info, rightFields); - return; - } - - FieldsEnum fieldsEnum = leftFields.iterator(); - String field; - while ((field = fieldsEnum.next()) != null) { - DocValues leftDocValues = leftPerDoc.docValues(field); - DocValues rightDocValues = rightPerDoc.docValues(field); - if (leftDocValues == null || rightDocValues == null) { - assertNull(info, leftDocValues); - assertNull(info, rightDocValues); - continue; - } + Set leftValues = new HashSet(leftReader.getFieldNames(FieldOption.DOC_VALUES)); + Set rightValues = new HashSet(rightReader.getFieldNames(FieldOption.DOC_VALUES)); + assertEquals(info, leftValues, rightValues); + + for (String field : leftValues) { + DocValues leftDocValues = MultiDocValues.getDocValues(leftReader, field); + DocValues rightDocValues = MultiDocValues.getDocValues(rightReader, field); + assertNotNull(info, leftDocValues); + assertNotNull(info, rightDocValues); assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource()); assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource()); } diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java index ac867ace604..35fa90f4c10 100644 --- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java +++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java @@ -37,13 +37,13 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.MultiPerDocValues; -import org.apache.lucene.index.PerDocValues; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.PerDocProducer; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; @@ -104,7 +104,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { TopDocs search = searcher.search(query, 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; - DocValues docValues = MultiPerDocValues.getPerDocs(reader).docValues("docId"); + DocValues docValues = MultiDocValues.getDocValues(reader, "docId"); Source source = docValues.getSource(); for (int i = 0; i < scoreDocs.length; i++) { assertEquals(i, scoreDocs[i].doc); @@ -418,24 +418,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { d.close(); } - private DocValues getDocValues(IndexReader reader, String field) - throws IOException { - boolean singleSeg = reader.getSequentialSubReaders().length == 1; - PerDocValues perDoc = singleSeg ? reader.getSequentialSubReaders()[0].perDocValues() - : MultiPerDocValues.getPerDocs(reader); - switch (random.nextInt(singleSeg ? 3 : 2)) { // case 2 only if single seg - case 0: - return perDoc.docValues(field); - case 1: - DocValues docValues = perDoc.docValues(field); - if (docValues != null) { - return docValues; - } - throw new RuntimeException("no such field " + field); - case 2:// this only works if we are on a single seg index! - return reader.getSequentialSubReaders()[0].docValues(field); - } - throw new RuntimeException(); + private DocValues getDocValues(IndexReader reader, String field) throws IOException { + return MultiDocValues.getDocValues(reader, field); } private Source getSource(DocValues values) throws IOException { @@ -570,7 +554,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.forceMerge(1); IndexReader r = w.getReader(); w.close(); - assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0)); + assertEquals(17, r.getSequentialSubReaders()[0].docValues("field").load().getInt(0)); r.close(); d.close(); } @@ -600,7 +584,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.forceMerge(1); IndexReader r = w.getReader(); w.close(); - assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0)); + assertEquals(17, r.getSequentialSubReaders()[0].docValues("field").load().getInt(0)); r.close(); d.close(); }