LUCENE-3622: PerDocValues -> PerDocProducer (privately used only by SegmentCoreReaders), simplify IR API and MultiDocValues, remove MultiPerDocValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3622@1212828 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-12-10 17:43:00 +00:00
parent 4387ff6161
commit 75d6217091
25 changed files with 167 additions and 373 deletions

View File

@ -40,8 +40,8 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFieldVisitor;
@ -1159,7 +1159,7 @@ public class MemoryIndex {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
return null; return null;
} }
} }

View File

@ -27,6 +27,8 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PerDocProducer;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
@ -1130,8 +1132,7 @@ public class CheckIndex {
for (FieldInfo fieldInfo : fieldInfos) { for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) { if (fieldInfo.hasDocValues()) {
status.totalValueFields++; status.totalValueFields++;
final PerDocValues perDocValues = reader.perDocValues(); final DocValues docValues = reader.docValues(fieldInfo.name);
final DocValues docValues = perDocValues.docValues(fieldInfo.name);
if (docValues == null) { if (docValues == null) {
continue; continue;
} }

View File

@ -31,11 +31,8 @@ import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.MapBackedSet; import org.apache.lucene.util.MapBackedSet;
/** /**
@ -716,7 +713,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues");
} }
} }

View File

@ -421,9 +421,9 @@ public class FilterIndexReader extends IndexReader {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
ensureOpen(); ensureOpen();
return in.perDocValues(); return in.docValues(field);
} }
@Override @Override

View File

@ -773,21 +773,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
* through them yourself. */ * through them yourself. */
public abstract Fields fields() throws IOException; public abstract Fields fields() throws IOException;
/**
* Returns {@link PerDocValues} for this reader.
* This method may return null if the reader has no per-document
* values stored.
*
* <p><b>NOTE</b>: if this is a multi reader ({@link
* #getSequentialSubReaders} is not null) then this
* method will throw UnsupportedOperationException. If
* you really need {@link PerDocValues} for such a reader,
* use {@link MultiPerDocValues#getPerDocs(IndexReader)}. However, for
* performance reasons, it's best to get all sub-readers
* using {@link ReaderUtil#gatherSubReaders} and iterate
* through them yourself. */
public abstract PerDocValues perDocValues() throws IOException;
public final int docFreq(Term term) throws IOException { public final int docFreq(Term term) throws IOException {
return docFreq(term.field(), term.bytes()); return docFreq(term.field(), term.bytes());
} }
@ -1146,14 +1131,20 @@ public abstract class IndexReader implements Cloneable,Closeable {
throw new UnsupportedOperationException("This reader does not support this method."); throw new UnsupportedOperationException("This reader does not support this method.");
} }
public final DocValues docValues(String field) throws IOException { /**
ensureOpen(); * Returns {@link DocValues} for this field.
final PerDocValues perDoc = perDocValues(); * This method may return null if the reader has no per-document
if (perDoc == null) { * values stored.
return null; *
} * <p><b>NOTE</b>: if this is a multi reader ({@link
return perDoc.docValues(field); * #getSequentialSubReaders} is not null) then this
} * method will throw UnsupportedOperationException. If
* you really need {@link DocValues} for such a reader,
* use {@link MultiDocValues#getDocValues(IndexReader,String)}. However, for
* performance reasons, it's best to get all sub-readers
* using {@link ReaderUtil#gatherSubReaders} and iterate
* through them yourself. */
public abstract DocValues docValues(String field) throws IOException;
private volatile Fields fields; private volatile Fields fields;
@ -1169,21 +1160,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
return fields; return fields;
} }
private volatile PerDocValues perDocValues;
/** @lucene.internal */
void storePerDoc(PerDocValues perDocValues) {
ensureOpen();
this.perDocValues = perDocValues;
}
/** @lucene.internal */
PerDocValues retrievePerDoc() {
ensureOpen();
return perDocValues;
}
/** /**
* A struct like class that represents a hierarchical relationship between * A struct like class that represents a hierarchical relationship between
* {@link IndexReader} instances. * {@link IndexReader} instances.

View File

@ -17,10 +17,13 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather;
/** /**
* A wrapper for compound IndexReader providing access to per segment * A wrapper for compound IndexReader providing access to per segment
@ -31,59 +34,92 @@ import org.apache.lucene.util.ReaderUtil;
*/ */
public class MultiDocValues extends DocValues { public class MultiDocValues extends DocValues {
public static class DocValuesIndex { public static class DocValuesSlice {
public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0]; public final static DocValuesSlice[] EMPTY_ARRAY = new DocValuesSlice[0];
final int start; final int start;
final int length; final int length;
final DocValues docValues; DocValues docValues;
public DocValuesIndex(DocValues docValues, int start, int length) { public DocValuesSlice(DocValues docValues, int start, int length) {
this.docValues = docValues; this.docValues = docValues;
this.start = start; this.start = start;
this.length = length; this.length = length;
} }
} }
private DocValuesIndex[] docValuesIdx; private DocValuesSlice[] slices;
private int[] starts; private int[] starts;
private Type type; private Type type;
private int valueSize; private int valueSize;
public MultiDocValues() { private MultiDocValues(DocValuesSlice[] slices, int[] starts, TypePromoter promotedType) {
starts = new int[0]; this.starts = starts;
docValuesIdx = new DocValuesIndex[0]; this.slices = slices;
this.type = promotedType.type();
this.valueSize = promotedType.getValueSize();
} }
public MultiDocValues(DocValuesIndex[] docValuesIdx) { /**
reset(docValuesIdx); * Returns a single {@link DocValues} instance for this field, merging
* their values on the fly.
*
* <p>
* <b>NOTE</b>: this is a slow way to access DocValues. It's better to get the
* sub-readers (using {@link Gather}) and iterate through them yourself.
*/
public static DocValues getDocValues(IndexReader r, final String field) throws IOException {
final IndexReader[] subs = r.getSequentialSubReaders();
if (subs == null) {
// already an atomic reader
return r.docValues(field);
} else if (subs.length == 0) {
// no fields
return null;
} else if (subs.length == 1) {
return getDocValues(subs[0], field);
} else {
final List<DocValuesSlice> slices = new ArrayList<DocValuesSlice>();
final TypePromoter promotedType[] = new TypePromoter[1];
promotedType[0] = TypePromoter.getIdentityPromoter();
// gather all docvalues fields, accumulating a promoted type across
// potentially incompatible types
new ReaderUtil.Gather(r) {
@Override
protected void add(int base, IndexReader r) throws IOException {
final DocValues d = r.docValues(field);
if (d != null) {
TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize());
promotedType[0] = promotedType[0].promote(incoming);
}
slices.add(new DocValuesSlice(d, base, r.maxDoc()));
}
}.run();
// return null if no docvalues encountered anywhere
if (promotedType[0] == TypePromoter.getIdentityPromoter()) {
return null;
}
// populate starts and fill gaps with empty docvalues
int starts[] = new int[slices.size()];
for (int i = 0; i < slices.size(); i++) {
DocValuesSlice slice = slices.get(i);
starts[i] = slice.start;
if (slice.docValues == null) {
slice.docValues = new EmptyDocValues(slice.length, promotedType[0].type());
}
}
return new MultiDocValues(slices.toArray(new DocValuesSlice[slices.size()]), starts, promotedType[0]);
}
} }
@Override @Override
public Source load() throws IOException { public Source load() throws IOException {
return new MultiSource(docValuesIdx, starts, false); return new MultiSource(slices, starts, false, type);
}
public DocValues reset(DocValuesIndex[] docValuesIdx) {
final int[] start = new int[docValuesIdx.length];
TypePromoter promoter = TypePromoter.getIdentityPromoter();
for (int i = 0; i < docValuesIdx.length; i++) {
start[i] = docValuesIdx[i].start;
if (!(docValuesIdx[i].docValues instanceof EmptyDocValues)) {
// only promote if not a dummy
final TypePromoter incomingPromoter = TypePromoter.create(
docValuesIdx[i].docValues.type(),
docValuesIdx[i].docValues.getValueSize());
promoter = promoter.promote(incomingPromoter);
if (promoter == null) {
throw new IllegalStateException("Can not promote " + incomingPromoter);
}
}
}
this.type = promoter.type();
this.valueSize = promoter.getValueSize();
this.starts = start;
this.docValuesIdx = docValuesIdx;
return this;
} }
public static class EmptyDocValues extends DocValues { public static class EmptyDocValues extends DocValues {
@ -117,14 +153,14 @@ public class MultiDocValues extends DocValues {
private int start = 0; private int start = 0;
private Source current; private Source current;
private final int[] starts; private final int[] starts;
private final DocValuesIndex[] docValuesIdx; private final DocValuesSlice[] slices;
private boolean direct; private boolean direct;
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts, boolean direct) { public MultiSource(DocValuesSlice[] slices, int[] starts, boolean direct, Type type) {
super(docValuesIdx[0].docValues.type()); super(type);
this.docValuesIdx = docValuesIdx; this.slices = slices;
this.starts = starts; this.starts = starts;
assert docValuesIdx.length != 0; assert slices.length != 0;
this.direct = direct; this.direct = direct;
} }
@ -138,22 +174,22 @@ public class MultiDocValues extends DocValues {
return docID - start; return docID - start;
} else { } else {
final int idx = ReaderUtil.subIndex(docID, starts); final int idx = ReaderUtil.subIndex(docID, starts);
assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx assert idx >= 0 && idx < slices.length : "idx was " + idx
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts); + " for doc id: " + docID + " slices : " + Arrays.toString(starts);
assert docValuesIdx[idx] != null; assert slices[idx] != null;
try { try {
if (direct) { if (direct) {
current = docValuesIdx[idx].docValues.getDirectSource(); current = slices[idx].docValues.getDirectSource();
} else { } else {
current = docValuesIdx[idx].docValues.getSource(); current = slices[idx].docValues.getSource();
} }
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("load failed", e); // TODO how should we throw new RuntimeException("load failed", e); // TODO how should we
// handle this // handle this
} }
start = docValuesIdx[idx].start; start = slices[idx].start;
numDocs = docValuesIdx[idx].length; numDocs = slices[idx].length;
return docID - start; return docID - start;
} }
} }
@ -206,6 +242,6 @@ public class MultiDocValues extends DocValues {
@Override @Override
public Source getDirectSource() throws IOException { public Source getDirectSource() throws IOException {
return new MultiSource(docValuesIdx, starts, true); return new MultiSource(slices, starts, true, type);
} }
} }

View File

@ -38,14 +38,9 @@ public final class MultiFieldsEnum extends FieldsEnum {
// Holds sub-readers containing field we are currently // Holds sub-readers containing field we are currently
// on, popped from queue. // on, popped from queue.
private final FieldsEnumWithSlice[] top; private final FieldsEnumWithSlice[] top;
private final FieldsEnumWithSlice[] enumWithSlices;
private int numTop; private int numTop;
// Re-used TermsEnum
private final MultiTermsEnum terms;
private final MultiDocValues docValues;
private final Fields fields; private final Fields fields;
private String currentField; private String currentField;
@ -54,9 +49,7 @@ public final class MultiFieldsEnum extends FieldsEnum {
* (ie, {@link FieldsEnum#next} has not been called. */ * (ie, {@link FieldsEnum#next} has not been called. */
public MultiFieldsEnum(MultiFields fields, FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException { public MultiFieldsEnum(MultiFields fields, FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException {
this.fields = fields; this.fields = fields;
terms = new MultiTermsEnum(subSlices);
queue = new FieldMergeQueue(subs.length); queue = new FieldMergeQueue(subs.length);
docValues = new MultiDocValues();
top = new FieldsEnumWithSlice[subs.length]; top = new FieldsEnumWithSlice[subs.length];
List<FieldsEnumWithSlice> enumWithSlices = new ArrayList<FieldsEnumWithSlice>(); List<FieldsEnumWithSlice> enumWithSlices = new ArrayList<FieldsEnumWithSlice>();
@ -72,8 +65,6 @@ public final class MultiFieldsEnum extends FieldsEnum {
queue.add(sub); queue.add(sub);
} }
} }
this.enumWithSlices = enumWithSlices.toArray(FieldsEnumWithSlice.EMPTY_ARRAY);
} }
@Override @Override

View File

@ -1,147 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.index.MultiDocValues.DocValuesIndex;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather;
/**
* Exposes per-document values, merged from per-document values API of
* sub-segments. This is useful when you're interacting with an {@link IndexReader}
* implementation that consists of sequential sub-readers (eg DirectoryReader
* or {@link MultiReader}).
*
* <p>
* <b>NOTE</b>: for multi readers, you'll get better performance by gathering
* the sub readers using {@link ReaderUtil#gatherSubReaders} and then operate
* per-reader, instead of using this class.
*
* @lucene.experimental
*/
public class MultiPerDocValues extends PerDocValues {
private final PerDocValues[] subs;
private final ReaderUtil.Slice[] subSlices;
private final Map<String, DocValues> docValues = new ConcurrentHashMap<String, DocValues>();
public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) {
this.subs = subs;
this.subSlices = subSlices;
}
/**
* Returns a single {@link PerDocValues} instance for this reader, merging
* their values on the fly. This method will not return <code>null</code>.
*
* <p>
* <b>NOTE</b>: this is a slow way to access postings. It's better to get the
* sub-readers (using {@link Gather}) and iterate through them yourself.
*/
public static PerDocValues getPerDocs(IndexReader r) throws IOException {
final IndexReader[] subs = r.getSequentialSubReaders();
if (subs == null) {
// already an atomic reader
return r.perDocValues();
} else if (subs.length == 0) {
// no fields
return null;
} else if (subs.length == 1) {
return getPerDocs(subs[0]);
}
PerDocValues perDocValues = r.retrievePerDoc();
if (perDocValues == null) {
final List<PerDocValues> producer = new ArrayList<PerDocValues>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
new ReaderUtil.Gather(r) {
@Override
protected void add(int base, IndexReader r) throws IOException {
final PerDocValues f = r.perDocValues();
if (f != null) {
producer.add(f);
slices
.add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1));
}
}
}.run();
if (producer.size() == 0) {
return null;
} else if (producer.size() == 1) {
perDocValues = producer.get(0);
} else {
perDocValues = new MultiPerDocValues(
producer.toArray(PerDocValues.EMPTY_ARRAY),
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
}
r.storePerDoc(perDocValues);
}
return perDocValues;
}
public DocValues docValues(String field) throws IOException {
DocValues result = docValues.get(field);
if (result == null) {
// Lazy init: first time this field is requested, we
// create & add to docValues:
final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
int docsUpto = 0;
DocValues.Type type = null;
// Gather all sub-readers that share this field
for (int i = 0; i < subs.length; i++) {
DocValues values = subs[i].docValues(field);
final int start = subSlices[i].start;
final int length = subSlices[i].length;
if (values != null) {
if (docsUpto != start) {
type = values.type();
docValuesIndex.add(new MultiDocValues.DocValuesIndex(
new MultiDocValues.EmptyDocValues(start, type), docsUpto, start
- docsUpto));
}
docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
length));
docsUpto = start + length;
} else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
docValuesIndex.add(new MultiDocValues.DocValuesIndex(
new MultiDocValues.EmptyDocValues(start, type), docsUpto, start
- docsUpto));
}
}
if (docValuesIndex.isEmpty()) {
return null;
}
result = new MultiDocValues(
docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
docValues.put(field, result);
}
return result;
}
public void close() throws IOException {
IOUtils.close(this.subs);
}
}

View File

@ -22,6 +22,7 @@ import java.util.Collection;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
@ -330,7 +331,7 @@ public class MultiReader extends IndexReader implements Cloneable {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields"); throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues");
} }
} }

View File

@ -57,7 +57,6 @@ public class ParallelReader extends IndexReader {
private boolean hasDeletions; private boolean hasDeletions;
private final ParallelFields fields = new ParallelFields(); private final ParallelFields fields = new ParallelFields();
private final ParallelPerDocs perDocs = new ParallelPerDocs();
/** Construct a ParallelReader. /** Construct a ParallelReader.
* <p>Note that all subreaders are closed if this ParallelReader is closed.</p> * <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
@ -130,7 +129,6 @@ public class ParallelReader extends IndexReader {
if (fieldToReader.get(field) == null) { if (fieldToReader.get(field) == null) {
fieldToReader.put(field, reader); fieldToReader.put(field, reader);
this.fields.addField(field, MultiFields.getFields(reader).terms(field)); this.fields.addField(field, MultiFields.getFields(reader).terms(field));
this.perDocs.addField(field, reader);
} }
} }
@ -463,35 +461,8 @@ public class ParallelReader extends IndexReader {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
ensureOpen(); IndexReader reader = fieldToReader.get(field);
return perDocs; return reader == null ? null : reader.docValues(field);
}
// Single instance of this, per ParallelReader instance
private static final class ParallelPerDocs extends PerDocValues {
final TreeMap<String,DocValues> fields = new TreeMap<String,DocValues>();
void addField(String field, IndexReader r) throws IOException {
PerDocValues perDocs = MultiPerDocValues.getPerDocs(r);
if (perDocs != null) {
fields.put(field, perDocs.docValues(field));
}
}
@Override
public void close() throws IOException {
// nothing to do here
}
@Override
public DocValues docValues(String field) throws IOException {
return fields.get(field);
}
} }
} }

View File

@ -22,6 +22,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.NormsReader; import org.apache.lucene.index.codecs.NormsReader;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.StoredFieldsReader; import org.apache.lucene.index.codecs.StoredFieldsReader;
@ -47,7 +48,7 @@ final class SegmentCoreReaders {
final FieldInfos fieldInfos; final FieldInfos fieldInfos;
final FieldsProducer fields; final FieldsProducer fields;
final PerDocValues perDocProducer; final PerDocProducer perDocProducer;
final NormsReader norms; final NormsReader norms;
final Directory dir; final Directory dir;

View File

@ -27,6 +27,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.StoredFieldsReader; import org.apache.lucene.index.codecs.StoredFieldsReader;
import org.apache.lucene.index.codecs.TermVectorsReader; import org.apache.lucene.index.codecs.TermVectorsReader;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
@ -424,9 +425,13 @@ public final class SegmentReader extends IndexReader implements Cloneable {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
ensureOpen(); ensureOpen();
return core.perDocProducer; final PerDocProducer perDoc = core.perDocProducer;
if (perDoc == null) {
return null;
}
return perDoc.docValues(field);
} }
/** /**

View File

@ -34,11 +34,12 @@ import org.apache.lucene.index.MultiReader; // javadoc
* IndexReader#getSequentialSubReaders}) to emulate an * IndexReader#getSequentialSubReaders}) to emulate an
* atomic reader. This requires implementing the postings * atomic reader. This requires implementing the postings
* APIs on-the-fly, using the static methods in {@link * APIs on-the-fly, using the static methods in {@link
* MultiFields}, by stepping through the sub-readers to * MultiFields}, {@link MultiNorms}, {@link MultiDocValues},
* merge fields/terms, appending docs, etc. * by stepping through the sub-readers to merge fields/terms,
* appending docs, etc.
* *
* <p>If you ever hit an UnsupportedOperationException saying * <p>If you ever hit an UnsupportedOperationException saying
* "please use MultiFields.XXX instead", the simple * "please use MultiXXX.YYY instead", the simple
* but non-performant workaround is to wrap your reader * but non-performant workaround is to wrap your reader
* using this class.</p> * using this class.</p>
* *
@ -71,9 +72,9 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader {
} }
@Override @Override
public PerDocValues perDocValues() throws IOException { public DocValues docValues(String field) throws IOException {
ensureOpen(); ensureOpen();
return MultiPerDocValues.getPerDocs(in); return MultiDocValues.getDocValues(in, field);
} }
@Override @Override

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
@ -28,6 +27,6 @@ import org.apache.lucene.store.Directory;
public abstract class DocValuesFormat { public abstract class DocValuesFormat {
public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException;
public abstract PerDocValues docsProducer(SegmentReadState state) throws IOException; public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException;
public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException; public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException;
} }

View File

@ -28,7 +28,6 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadocs import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.codecs.lucene40.values.Bytes; import org.apache.lucene.index.codecs.lucene40.values.Bytes;
import org.apache.lucene.index.codecs.lucene40.values.Floats; import org.apache.lucene.index.codecs.lucene40.values.Floats;
import org.apache.lucene.index.codecs.lucene40.values.Ints; import org.apache.lucene.index.codecs.lucene40.values.Ints;
@ -37,11 +36,11 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** /**
* Abstract base class for PerDocValues implementations * Abstract base class for PerDocProducer implementations
* @lucene.experimental * @lucene.experimental
*/ */
// TODO: this needs to go under lucene40 codec (its specific to its impl) // TODO: this needs to go under lucene40 codec (its specific to its impl)
public abstract class DocValuesReaderBase extends PerDocValues { public abstract class DocValuesReaderBase extends PerDocProducer {
protected abstract void closeInternal(Collection<? extends Closeable> closeables) throws IOException; protected abstract void closeInternal(Collection<? extends Closeable> closeables) throws IOException;
protected abstract Map<String, DocValues> docValues(); protected abstract Map<String, DocValues> docValues();

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PerDocValues;
/** /**
* Abstract API that consumes per document values. Concrete implementations of * Abstract API that consumes per document values. Concrete implementations of
@ -39,30 +38,22 @@ public abstract class PerDocConsumer implements Closeable{
throws IOException; throws IOException;
/** /**
* Consumes and merges the given {@link PerDocValues} producer * Consumes and merges the given {@link PerDocProducer} producer
* into this consumers format. * into this consumers format.
*/ */
public void merge(MergeState mergeState) throws IOException { public void merge(MergeState mergeState) throws IOException {
final DocValues[] docValues = new DocValues[mergeState.readers.size()]; final DocValues[] docValues = new DocValues[mergeState.readers.size()];
final PerDocValues[] perDocValues = new PerDocValues[mergeState.readers.size()];
// pull all PerDocValues
for (int i = 0; i < perDocValues.length; i++) {
perDocValues[i] = mergeState.readers.get(i).reader.perDocValues();
}
for (FieldInfo fieldInfo : mergeState.fieldInfos) { for (FieldInfo fieldInfo : mergeState.fieldInfos) {
mergeState.fieldInfo = fieldInfo; // set the field we are merging mergeState.fieldInfo = fieldInfo; // set the field we are merging
if (fieldInfo.hasDocValues()) { if (fieldInfo.hasDocValues()) {
for (int i = 0; i < perDocValues.length; i++) { for (int i = 0; i < docValues.length; i++) {
if (perDocValues[i] != null) { // get all IDV to merge docValues[i] = mergeState.readers.get(i).reader.docValues(fieldInfo.name);
docValues[i] = perDocValues[i].docValues(fieldInfo.name);
}
} }
final DocValuesConsumer docValuesConsumer = addValuesField(fieldInfo.getDocValuesType(), fieldInfo); final DocValuesConsumer docValuesConsumer = addValuesField(fieldInfo.getDocValuesType(), fieldInfo);
assert docValuesConsumer != null; assert docValuesConsumer != null;
docValuesConsumer.merge(mergeState, docValues); docValuesConsumer.merge(mergeState, docValues);
} }
} }
/* NOTE: don't close the perDocProducers here since they are private segment producers
* and will be closed once the SegmentReader goes out of scope */
} }
} }

View File

@ -1,4 +1,4 @@
package org.apache.lucene.index; package org.apache.lucene.index.codecs;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -18,8 +18,7 @@ package org.apache.lucene.index;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.codecs.PostingsFormat;
/** /**
* Abstract API that provides access to one or more per-document storage * Abstract API that provides access to one or more per-document storage
@ -27,13 +26,13 @@ import org.apache.lucene.index.codecs.PostingsFormat;
* storage on a per-document basis corresponding to their actual * storage on a per-document basis corresponding to their actual
* {@link PerDocConsumer} counterpart. * {@link PerDocConsumer} counterpart.
* <p> * <p>
* The {@link PerDocValues} API is accessible through the * The {@link PerDocProducer} API is accessible through the
* {@link PostingsFormat} - API providing per field consumers and producers for inverted * {@link PostingsFormat} - API providing per field consumers and producers for inverted
* data (terms, postings) as well as per-document data. * data (terms, postings) as well as per-document data.
* *
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class PerDocValues implements Closeable { public abstract class PerDocProducer implements Closeable {
/** /**
* Returns {@link DocValues} for the current field. * Returns {@link DocValues} for the current field.
* *
@ -45,5 +44,5 @@ public abstract class PerDocValues implements Closeable {
*/ */
public abstract DocValues docValues(String field) throws IOException; public abstract DocValues docValues(String field) throws IOException;
public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0]; public static final PerDocProducer[] EMPTY_ARRAY = new PerDocProducer[0];
} }

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs.lucene3x;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
@ -28,6 +27,7 @@ import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.DocValuesFormat;
import org.apache.lucene.index.codecs.FieldInfosFormat; import org.apache.lucene.index.codecs.FieldInfosFormat;
import org.apache.lucene.index.codecs.NormsFormat; import org.apache.lucene.index.codecs.NormsFormat;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.StoredFieldsFormat;
import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.PostingsFormat;
@ -75,7 +75,7 @@ public class Lucene3xCodec extends Codec {
} }
@Override @Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException { public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return null; return null;
} }

View File

@ -20,12 +20,12 @@ package org.apache.lucene.index.codecs.lucene40;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.DocValuesFormat;
import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
public class Lucene40DocValuesFormat extends DocValuesFormat { public class Lucene40DocValuesFormat extends DocValuesFormat {
@ -36,7 +36,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
} }
@Override @Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException { public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new Lucene40DocValuesProducer(state); return new Lucene40DocValuesProducer(state);
} }

View File

@ -33,7 +33,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
/** /**
* Default PerDocValues implementation that uses compound file. * Default PerDocProducer implementation that uses compound file.
* @lucene.experimental * @lucene.experimental
*/ */
public class Lucene40DocValuesProducer extends DocValuesReaderBase { public class Lucene40DocValuesProducer extends DocValuesReaderBase {

View File

@ -28,7 +28,7 @@ import org.apache.lucene.index.codecs.DocValuesReaderBase;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
/** /**
* Implementation of PerDocValues that uses separate files. * Implementation of PerDocProducer that uses separate files.
* @lucene.experimental * @lucene.experimental
*/ */
public class SepDocValuesProducer extends DocValuesReaderBase { public class SepDocValuesProducer extends DocValuesReaderBase {

View File

@ -20,12 +20,12 @@ package org.apache.lucene.index.codecs.mocksep;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.DocValuesFormat;
import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.sep.SepDocValuesConsumer; import org.apache.lucene.index.codecs.sep.SepDocValuesConsumer;
import org.apache.lucene.index.codecs.sep.SepDocValuesProducer; import org.apache.lucene.index.codecs.sep.SepDocValuesProducer;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -43,7 +43,7 @@ public class MockSepDocValuesFormat extends DocValuesFormat {
} }
@Override @Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException { public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SepDocValuesProducer(state); return new SepDocValuesProducer(state);
} }

View File

@ -1284,7 +1284,7 @@ public class TestAddIndexes extends LuceneTestCase {
w.close(); w.close();
IndexReader sr = getOnlySegmentReader(r3); IndexReader sr = getOnlySegmentReader(r3);
assertEquals(2, sr.numDocs()); assertEquals(2, sr.numDocs());
DocValues docValues = sr.perDocValues().docValues("dv"); DocValues docValues = sr.docValues("dv");
assertNotNull(docValues); assertNotNull(docValues);
r3.close(); r3.close();
d3.close(); d3.close();

View File

@ -18,12 +18,15 @@ package org.apache.lucene.index;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Random; import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -516,29 +519,15 @@ public class TestDuelingCodecs extends LuceneTestCase {
* checks that docvalues across all fields are equivalent * checks that docvalues across all fields are equivalent
*/ */
public void assertDocValues(IndexReader leftReader, IndexReader rightReader) throws Exception { public void assertDocValues(IndexReader leftReader, IndexReader rightReader) throws Exception {
PerDocValues leftPerDoc = MultiPerDocValues.getPerDocs(leftReader); Set<String> leftValues = new HashSet<String>(leftReader.getFieldNames(FieldOption.DOC_VALUES));
PerDocValues rightPerDoc = MultiPerDocValues.getPerDocs(rightReader); Set<String> rightValues = new HashSet<String>(rightReader.getFieldNames(FieldOption.DOC_VALUES));
assertEquals(info, leftValues, rightValues);
Fields leftFields = MultiFields.getFields(leftReader);
Fields rightFields = MultiFields.getFields(rightReader); for (String field : leftValues) {
// Fields could be null if there are no postings, DocValues leftDocValues = MultiDocValues.getDocValues(leftReader, field);
// but then it must be null for both DocValues rightDocValues = MultiDocValues.getDocValues(rightReader, field);
if (leftFields == null || rightFields == null) { assertNotNull(info, leftDocValues);
assertNull(info, leftFields); assertNotNull(info, rightDocValues);
assertNull(info, rightFields);
return;
}
FieldsEnum fieldsEnum = leftFields.iterator();
String field;
while ((field = fieldsEnum.next()) != null) {
DocValues leftDocValues = leftPerDoc.docValues(field);
DocValues rightDocValues = rightPerDoc.docValues(field);
if (leftDocValues == null || rightDocValues == null) {
assertNull(info, leftDocValues);
assertNull(info, rightDocValues);
continue;
}
assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource()); assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource());
assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource()); assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource());
} }

View File

@ -37,13 +37,13 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MultiPerDocValues; import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.LockObtainFailedException;
@ -104,7 +104,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
TopDocs search = searcher.search(query, 10); TopDocs search = searcher.search(query, 10);
assertEquals(5, search.totalHits); assertEquals(5, search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs; ScoreDoc[] scoreDocs = search.scoreDocs;
DocValues docValues = MultiPerDocValues.getPerDocs(reader).docValues("docId"); DocValues docValues = MultiDocValues.getDocValues(reader, "docId");
Source source = docValues.getSource(); Source source = docValues.getSource();
for (int i = 0; i < scoreDocs.length; i++) { for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc); assertEquals(i, scoreDocs[i].doc);
@ -418,24 +418,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
d.close(); d.close();
} }
private DocValues getDocValues(IndexReader reader, String field) private DocValues getDocValues(IndexReader reader, String field) throws IOException {
throws IOException { return MultiDocValues.getDocValues(reader, field);
boolean singleSeg = reader.getSequentialSubReaders().length == 1;
PerDocValues perDoc = singleSeg ? reader.getSequentialSubReaders()[0].perDocValues()
: MultiPerDocValues.getPerDocs(reader);
switch (random.nextInt(singleSeg ? 3 : 2)) { // case 2 only if single seg
case 0:
return perDoc.docValues(field);
case 1:
DocValues docValues = perDoc.docValues(field);
if (docValues != null) {
return docValues;
}
throw new RuntimeException("no such field " + field);
case 2:// this only works if we are on a single seg index!
return reader.getSequentialSubReaders()[0].docValues(field);
}
throw new RuntimeException();
} }
private Source getSource(DocValues values) throws IOException { private Source getSource(DocValues values) throws IOException {
@ -570,7 +554,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1); w.forceMerge(1);
IndexReader r = w.getReader(); IndexReader r = w.getReader();
w.close(); w.close();
assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0)); assertEquals(17, r.getSequentialSubReaders()[0].docValues("field").load().getInt(0));
r.close(); r.close();
d.close(); d.close();
} }
@ -600,7 +584,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1); w.forceMerge(1);
IndexReader r = w.getReader(); IndexReader r = w.getReader();
w.close(); w.close();
assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0)); assertEquals(17, r.getSequentialSubReaders()[0].docValues("field").load().getInt(0));
r.close(); r.close();
d.close(); d.close();
} }