LUCENE-3622: PerDocValues -> PerDocProducer (privately used only by SegmentCoreReaders), simplify IR API and MultiDocValues, remove MultiPerDocValues

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3622@1212828 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-12-10 17:43:00 +00:00
parent 4387ff6161
commit 75d6217091
25 changed files with 167 additions and 373 deletions

View File

@ -40,8 +40,8 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.StoredFieldVisitor;
@ -1159,7 +1159,7 @@ public class MemoryIndex {
}
@Override
public PerDocValues perDocValues() throws IOException {
public DocValues docValues(String field) throws IOException {
return null;
}
}

View File

@ -27,6 +27,8 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PerDocProducer;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
@ -1130,8 +1132,7 @@ public class CheckIndex {
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
status.totalValueFields++;
final PerDocValues perDocValues = reader.perDocValues();
final DocValues docValues = perDocValues.docValues(fieldInfo.name);
final DocValues docValues = reader.docValues(fieldInfo.name);
if (docValues == null) {
continue;
}

View File

@ -31,11 +31,8 @@ import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.MapBackedSet;
/**
@ -716,7 +713,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
}
@Override
public PerDocValues perDocValues() throws IOException {
throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
public DocValues docValues(String field) throws IOException {
throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues");
}
}

View File

@ -421,9 +421,9 @@ public class FilterIndexReader extends IndexReader {
}
@Override
public PerDocValues perDocValues() throws IOException {
public DocValues docValues(String field) throws IOException {
ensureOpen();
return in.perDocValues();
return in.docValues(field);
}
@Override

View File

@ -773,21 +773,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
* through them yourself. */
public abstract Fields fields() throws IOException;
/**
* Returns {@link PerDocValues} for this reader.
* This method may return null if the reader has no per-document
* values stored.
*
* <p><b>NOTE</b>: if this is a multi reader ({@link
* #getSequentialSubReaders} is not null) then this
* method will throw UnsupportedOperationException. If
* you really need {@link PerDocValues} for such a reader,
* use {@link MultiPerDocValues#getPerDocs(IndexReader)}. However, for
* performance reasons, it's best to get all sub-readers
* using {@link ReaderUtil#gatherSubReaders} and iterate
* through them yourself. */
public abstract PerDocValues perDocValues() throws IOException;
public final int docFreq(Term term) throws IOException {
return docFreq(term.field(), term.bytes());
}
@ -1146,14 +1131,20 @@ public abstract class IndexReader implements Cloneable,Closeable {
throw new UnsupportedOperationException("This reader does not support this method.");
}
public final DocValues docValues(String field) throws IOException {
ensureOpen();
final PerDocValues perDoc = perDocValues();
if (perDoc == null) {
return null;
}
return perDoc.docValues(field);
}
/**
* Returns {@link DocValues} for this field.
* This method may return null if the reader has no per-document
* values stored.
*
* <p><b>NOTE</b>: if this is a multi reader ({@link
* #getSequentialSubReaders} is not null) then this
* method will throw UnsupportedOperationException. If
* you really need {@link DocValues} for such a reader,
* use {@link MultiDocValues#getDocValues(IndexReader,String)}. However, for
* performance reasons, it's best to get all sub-readers
* using {@link ReaderUtil#gatherSubReaders} and iterate
* through them yourself. */
public abstract DocValues docValues(String field) throws IOException;
private volatile Fields fields;
@ -1169,21 +1160,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
return fields;
}
private volatile PerDocValues perDocValues;
/** @lucene.internal */
void storePerDoc(PerDocValues perDocValues) {
ensureOpen();
this.perDocValues = perDocValues;
}
/** @lucene.internal */
PerDocValues retrievePerDoc() {
ensureOpen();
return perDocValues;
}
/**
* A struct like class that represents a hierarchical relationship between
* {@link IndexReader} instances.

View File

@ -17,10 +17,13 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather;
/**
* A wrapper for compound IndexReader providing access to per segment
@ -31,59 +34,92 @@ import org.apache.lucene.util.ReaderUtil;
*/
public class MultiDocValues extends DocValues {
public static class DocValuesIndex {
public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0];
public static class DocValuesSlice {
public final static DocValuesSlice[] EMPTY_ARRAY = new DocValuesSlice[0];
final int start;
final int length;
final DocValues docValues;
DocValues docValues;
public DocValuesIndex(DocValues docValues, int start, int length) {
public DocValuesSlice(DocValues docValues, int start, int length) {
this.docValues = docValues;
this.start = start;
this.length = length;
}
}
private DocValuesIndex[] docValuesIdx;
private DocValuesSlice[] slices;
private int[] starts;
private Type type;
private int valueSize;
public MultiDocValues() {
starts = new int[0];
docValuesIdx = new DocValuesIndex[0];
private MultiDocValues(DocValuesSlice[] slices, int[] starts, TypePromoter promotedType) {
this.starts = starts;
this.slices = slices;
this.type = promotedType.type();
this.valueSize = promotedType.getValueSize();
}
public MultiDocValues(DocValuesIndex[] docValuesIdx) {
reset(docValuesIdx);
/**
* Returns a single {@link DocValues} instance for this field, merging
* their values on the fly.
*
* <p>
* <b>NOTE</b>: this is a slow way to access DocValues. It's better to get the
* sub-readers (using {@link Gather}) and iterate through them yourself.
*/
public static DocValues getDocValues(IndexReader r, final String field) throws IOException {
final IndexReader[] subs = r.getSequentialSubReaders();
if (subs == null) {
// already an atomic reader
return r.docValues(field);
} else if (subs.length == 0) {
// no fields
return null;
} else if (subs.length == 1) {
return getDocValues(subs[0], field);
} else {
final List<DocValuesSlice> slices = new ArrayList<DocValuesSlice>();
final TypePromoter promotedType[] = new TypePromoter[1];
promotedType[0] = TypePromoter.getIdentityPromoter();
// gather all docvalues fields, accumulating a promoted type across
// potentially incompatible types
new ReaderUtil.Gather(r) {
@Override
protected void add(int base, IndexReader r) throws IOException {
final DocValues d = r.docValues(field);
if (d != null) {
TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize());
promotedType[0] = promotedType[0].promote(incoming);
}
slices.add(new DocValuesSlice(d, base, r.maxDoc()));
}
}.run();
// return null if no docvalues encountered anywhere
if (promotedType[0] == TypePromoter.getIdentityPromoter()) {
return null;
}
// populate starts and fill gaps with empty docvalues
int starts[] = new int[slices.size()];
for (int i = 0; i < slices.size(); i++) {
DocValuesSlice slice = slices.get(i);
starts[i] = slice.start;
if (slice.docValues == null) {
slice.docValues = new EmptyDocValues(slice.length, promotedType[0].type());
}
}
return new MultiDocValues(slices.toArray(new DocValuesSlice[slices.size()]), starts, promotedType[0]);
}
}
@Override
public Source load() throws IOException {
return new MultiSource(docValuesIdx, starts, false);
}
public DocValues reset(DocValuesIndex[] docValuesIdx) {
final int[] start = new int[docValuesIdx.length];
TypePromoter promoter = TypePromoter.getIdentityPromoter();
for (int i = 0; i < docValuesIdx.length; i++) {
start[i] = docValuesIdx[i].start;
if (!(docValuesIdx[i].docValues instanceof EmptyDocValues)) {
// only promote if not a dummy
final TypePromoter incomingPromoter = TypePromoter.create(
docValuesIdx[i].docValues.type(),
docValuesIdx[i].docValues.getValueSize());
promoter = promoter.promote(incomingPromoter);
if (promoter == null) {
throw new IllegalStateException("Can not promote " + incomingPromoter);
}
}
}
this.type = promoter.type();
this.valueSize = promoter.getValueSize();
this.starts = start;
this.docValuesIdx = docValuesIdx;
return this;
return new MultiSource(slices, starts, false, type);
}
public static class EmptyDocValues extends DocValues {
@ -117,14 +153,14 @@ public class MultiDocValues extends DocValues {
private int start = 0;
private Source current;
private final int[] starts;
private final DocValuesIndex[] docValuesIdx;
private final DocValuesSlice[] slices;
private boolean direct;
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts, boolean direct) {
super(docValuesIdx[0].docValues.type());
this.docValuesIdx = docValuesIdx;
public MultiSource(DocValuesSlice[] slices, int[] starts, boolean direct, Type type) {
super(type);
this.slices = slices;
this.starts = starts;
assert docValuesIdx.length != 0;
assert slices.length != 0;
this.direct = direct;
}
@ -138,22 +174,22 @@ public class MultiDocValues extends DocValues {
return docID - start;
} else {
final int idx = ReaderUtil.subIndex(docID, starts);
assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx
assert idx >= 0 && idx < slices.length : "idx was " + idx
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts);
assert docValuesIdx[idx] != null;
assert slices[idx] != null;
try {
if (direct) {
current = docValuesIdx[idx].docValues.getDirectSource();
current = slices[idx].docValues.getDirectSource();
} else {
current = docValuesIdx[idx].docValues.getSource();
current = slices[idx].docValues.getSource();
}
} catch (IOException e) {
throw new RuntimeException("load failed", e); // TODO how should we
// handle this
}
start = docValuesIdx[idx].start;
numDocs = docValuesIdx[idx].length;
start = slices[idx].start;
numDocs = slices[idx].length;
return docID - start;
}
}
@ -206,6 +242,6 @@ public class MultiDocValues extends DocValues {
@Override
public Source getDirectSource() throws IOException {
return new MultiSource(docValuesIdx, starts, true);
return new MultiSource(slices, starts, true, type);
}
}

View File

@ -38,14 +38,9 @@ public final class MultiFieldsEnum extends FieldsEnum {
// Holds sub-readers containing field we are currently
// on, popped from queue.
private final FieldsEnumWithSlice[] top;
private final FieldsEnumWithSlice[] enumWithSlices;
private int numTop;
// Re-used TermsEnum
private final MultiTermsEnum terms;
private final MultiDocValues docValues;
private final Fields fields;
private String currentField;
@ -54,9 +49,7 @@ public final class MultiFieldsEnum extends FieldsEnum {
* (ie, {@link FieldsEnum#next} has not been called. */
public MultiFieldsEnum(MultiFields fields, FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException {
this.fields = fields;
terms = new MultiTermsEnum(subSlices);
queue = new FieldMergeQueue(subs.length);
docValues = new MultiDocValues();
top = new FieldsEnumWithSlice[subs.length];
List<FieldsEnumWithSlice> enumWithSlices = new ArrayList<FieldsEnumWithSlice>();
@ -72,8 +65,6 @@ public final class MultiFieldsEnum extends FieldsEnum {
queue.add(sub);
}
}
this.enumWithSlices = enumWithSlices.toArray(FieldsEnumWithSlice.EMPTY_ARRAY);
}
@Override

View File

@ -1,147 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.index.MultiDocValues.DocValuesIndex;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather;
/**
* Exposes per-document values, merged from per-document values API of
* sub-segments. This is useful when you're interacting with an {@link IndexReader}
* implementation that consists of sequential sub-readers (eg DirectoryReader
* or {@link MultiReader}).
*
* <p>
* <b>NOTE</b>: for multi readers, you'll get better performance by gathering
* the sub readers using {@link ReaderUtil#gatherSubReaders} and then operate
* per-reader, instead of using this class.
*
* @lucene.experimental
*/
public class MultiPerDocValues extends PerDocValues {
private final PerDocValues[] subs;
private final ReaderUtil.Slice[] subSlices;
private final Map<String, DocValues> docValues = new ConcurrentHashMap<String, DocValues>();
public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) {
this.subs = subs;
this.subSlices = subSlices;
}
/**
* Returns a single {@link PerDocValues} instance for this reader, merging
* their values on the fly. This method will not return <code>null</code>.
*
* <p>
* <b>NOTE</b>: this is a slow way to access postings. It's better to get the
* sub-readers (using {@link Gather}) and iterate through them yourself.
*/
public static PerDocValues getPerDocs(IndexReader r) throws IOException {
final IndexReader[] subs = r.getSequentialSubReaders();
if (subs == null) {
// already an atomic reader
return r.perDocValues();
} else if (subs.length == 0) {
// no fields
return null;
} else if (subs.length == 1) {
return getPerDocs(subs[0]);
}
PerDocValues perDocValues = r.retrievePerDoc();
if (perDocValues == null) {
final List<PerDocValues> producer = new ArrayList<PerDocValues>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
new ReaderUtil.Gather(r) {
@Override
protected void add(int base, IndexReader r) throws IOException {
final PerDocValues f = r.perDocValues();
if (f != null) {
producer.add(f);
slices
.add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1));
}
}
}.run();
if (producer.size() == 0) {
return null;
} else if (producer.size() == 1) {
perDocValues = producer.get(0);
} else {
perDocValues = new MultiPerDocValues(
producer.toArray(PerDocValues.EMPTY_ARRAY),
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
}
r.storePerDoc(perDocValues);
}
return perDocValues;
}
public DocValues docValues(String field) throws IOException {
DocValues result = docValues.get(field);
if (result == null) {
// Lazy init: first time this field is requested, we
// create & add to docValues:
final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
int docsUpto = 0;
DocValues.Type type = null;
// Gather all sub-readers that share this field
for (int i = 0; i < subs.length; i++) {
DocValues values = subs[i].docValues(field);
final int start = subSlices[i].start;
final int length = subSlices[i].length;
if (values != null) {
if (docsUpto != start) {
type = values.type();
docValuesIndex.add(new MultiDocValues.DocValuesIndex(
new MultiDocValues.EmptyDocValues(start, type), docsUpto, start
- docsUpto));
}
docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
length));
docsUpto = start + length;
} else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
docValuesIndex.add(new MultiDocValues.DocValuesIndex(
new MultiDocValues.EmptyDocValues(start, type), docsUpto, start
- docsUpto));
}
}
if (docValuesIndex.isEmpty()) {
return null;
}
result = new MultiDocValues(
docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
docValues.put(field, result);
}
return result;
}
public void close() throws IOException {
IOUtils.close(this.subs);
}
}

View File

@ -22,6 +22,7 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
@ -330,7 +331,7 @@ public class MultiReader extends IndexReader implements Cloneable {
}
@Override
public PerDocValues perDocValues() throws IOException {
throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
public DocValues docValues(String field) throws IOException {
throw new UnsupportedOperationException("please use MultiDocValues#getDocValues, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level DocValues");
}
}

View File

@ -57,7 +57,6 @@ public class ParallelReader extends IndexReader {
private boolean hasDeletions;
private final ParallelFields fields = new ParallelFields();
private final ParallelPerDocs perDocs = new ParallelPerDocs();
/** Construct a ParallelReader.
* <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
@ -130,7 +129,6 @@ public class ParallelReader extends IndexReader {
if (fieldToReader.get(field) == null) {
fieldToReader.put(field, reader);
this.fields.addField(field, MultiFields.getFields(reader).terms(field));
this.perDocs.addField(field, reader);
}
}
@ -463,35 +461,8 @@ public class ParallelReader extends IndexReader {
}
@Override
public PerDocValues perDocValues() throws IOException {
ensureOpen();
return perDocs;
}
// Single instance of this, per ParallelReader instance
private static final class ParallelPerDocs extends PerDocValues {
final TreeMap<String,DocValues> fields = new TreeMap<String,DocValues>();
void addField(String field, IndexReader r) throws IOException {
PerDocValues perDocs = MultiPerDocValues.getPerDocs(r);
if (perDocs != null) {
fields.put(field, perDocs.docValues(field));
}
}
@Override
public void close() throws IOException {
// nothing to do here
}
@Override
public DocValues docValues(String field) throws IOException {
return fields.get(field);
}
public DocValues docValues(String field) throws IOException {
IndexReader reader = fieldToReader.get(field);
return reader == null ? null : reader.docValues(field);
}
}

View File

@ -22,6 +22,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.NormsReader;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.StoredFieldsReader;
@ -47,7 +48,7 @@ final class SegmentCoreReaders {
final FieldInfos fieldInfos;
final FieldsProducer fields;
final PerDocValues perDocProducer;
final PerDocProducer perDocProducer;
final NormsReader norms;
final Directory dir;

View File

@ -27,6 +27,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.StoredFieldsReader;
import org.apache.lucene.index.codecs.TermVectorsReader;
import org.apache.lucene.store.IOContext;
@ -424,9 +425,13 @@ public final class SegmentReader extends IndexReader implements Cloneable {
}
@Override
public PerDocValues perDocValues() throws IOException {
public DocValues docValues(String field) throws IOException {
ensureOpen();
return core.perDocProducer;
final PerDocProducer perDoc = core.perDocProducer;
if (perDoc == null) {
return null;
}
return perDoc.docValues(field);
}
/**

View File

@ -34,11 +34,12 @@ import org.apache.lucene.index.MultiReader; // javadoc
* IndexReader#getSequentialSubReaders}) to emulate an
* atomic reader. This requires implementing the postings
* APIs on-the-fly, using the static methods in {@link
* MultiFields}, by stepping through the sub-readers to
* merge fields/terms, appending docs, etc.
* MultiFields}, {@link MultiNorms}, {@link MultiDocValues},
* by stepping through the sub-readers to merge fields/terms,
* appending docs, etc.
*
* <p>If you ever hit an UnsupportedOperationException saying
* "please use MultiFields.XXX instead", the simple
* "please use MultiXXX.YYY instead", the simple
* but non-performant workaround is to wrap your reader
* using this class.</p>
*
@ -71,9 +72,9 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader {
}
@Override
public PerDocValues perDocValues() throws IOException {
public DocValues docValues(String field) throws IOException {
ensureOpen();
return MultiPerDocValues.getPerDocs(in);
return MultiDocValues.getDocValues(in, field);
}
@Override

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
@ -28,6 +27,6 @@ import org.apache.lucene.store.Directory;
public abstract class DocValuesFormat {
public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException;
public abstract PerDocValues docsProducer(SegmentReadState state) throws IOException;
public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException;
public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException;
}

View File

@ -28,7 +28,6 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.codecs.lucene40.values.Bytes;
import org.apache.lucene.index.codecs.lucene40.values.Floats;
import org.apache.lucene.index.codecs.lucene40.values.Ints;
@ -37,11 +36,11 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
/**
* Abstract base class for PerDocValues implementations
* Abstract base class for PerDocProducer implementations
* @lucene.experimental
*/
// TODO: this needs to go under lucene40 codec (its specific to its impl)
public abstract class DocValuesReaderBase extends PerDocValues {
public abstract class DocValuesReaderBase extends PerDocProducer {
protected abstract void closeInternal(Collection<? extends Closeable> closeables) throws IOException;
protected abstract Map<String, DocValues> docValues();

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.PerDocValues;
/**
* Abstract API that consumes per document values. Concrete implementations of
@ -39,30 +38,22 @@ public abstract class PerDocConsumer implements Closeable{
throws IOException;
/**
* Consumes and merges the given {@link PerDocValues} producer
* Consumes and merges the given {@link PerDocProducer} producer
* into this consumers format.
*/
public void merge(MergeState mergeState) throws IOException {
final DocValues[] docValues = new DocValues[mergeState.readers.size()];
final PerDocValues[] perDocValues = new PerDocValues[mergeState.readers.size()];
// pull all PerDocValues
for (int i = 0; i < perDocValues.length; i++) {
perDocValues[i] = mergeState.readers.get(i).reader.perDocValues();
}
for (FieldInfo fieldInfo : mergeState.fieldInfos) {
mergeState.fieldInfo = fieldInfo; // set the field we are merging
if (fieldInfo.hasDocValues()) {
for (int i = 0; i < perDocValues.length; i++) {
if (perDocValues[i] != null) { // get all IDV to merge
docValues[i] = perDocValues[i].docValues(fieldInfo.name);
}
for (int i = 0; i < docValues.length; i++) {
docValues[i] = mergeState.readers.get(i).reader.docValues(fieldInfo.name);
}
final DocValuesConsumer docValuesConsumer = addValuesField(fieldInfo.getDocValuesType(), fieldInfo);
assert docValuesConsumer != null;
docValuesConsumer.merge(mergeState, docValues);
}
}
/* NOTE: don't close the perDocProducers here since they are private segment producers
* and will be closed once the SegmentReader goes out of scope */
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.index;
package org.apache.lucene.index.codecs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -18,8 +18,7 @@ package org.apache.lucene.index;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.DocValues;
/**
* Abstract API that provides access to one or more per-document storage
@ -27,13 +26,13 @@ import org.apache.lucene.index.codecs.PostingsFormat;
* storage on a per-document basis corresponding to their actual
* {@link PerDocConsumer} counterpart.
* <p>
* The {@link PerDocValues} API is accessible through the
* The {@link PerDocProducer} API is accessible through the
* {@link PostingsFormat} - API providing per field consumers and producers for inverted
* data (terms, postings) as well as per-document data.
*
* @lucene.experimental
*/
public abstract class PerDocValues implements Closeable {
public abstract class PerDocProducer implements Closeable {
/**
* Returns {@link DocValues} for the current field.
*
@ -45,5 +44,5 @@ public abstract class PerDocValues implements Closeable {
*/
public abstract DocValues docValues(String field) throws IOException;
public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0];
public static final PerDocProducer[] EMPTY_ARRAY = new PerDocProducer[0];
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs.lucene3x;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
@ -28,6 +27,7 @@ import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.DocValuesFormat;
import org.apache.lucene.index.codecs.FieldInfosFormat;
import org.apache.lucene.index.codecs.NormsFormat;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.StoredFieldsFormat;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PostingsFormat;
@ -75,7 +75,7 @@ public class Lucene3xCodec extends Codec {
}
@Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return null;
}

View File

@ -20,12 +20,12 @@ package org.apache.lucene.index.codecs.lucene40;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.DocValuesFormat;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.store.Directory;
public class Lucene40DocValuesFormat extends DocValuesFormat {
@ -36,7 +36,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
}
@Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new Lucene40DocValuesProducer(state);
}

View File

@ -33,7 +33,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/**
* Default PerDocValues implementation that uses compound file.
* Default PerDocProducer implementation that uses compound file.
* @lucene.experimental
*/
public class Lucene40DocValuesProducer extends DocValuesReaderBase {

View File

@ -28,7 +28,7 @@ import org.apache.lucene.index.codecs.DocValuesReaderBase;
import org.apache.lucene.util.IOUtils;
/**
* Implementation of PerDocValues that uses separate files.
* Implementation of PerDocProducer that uses separate files.
* @lucene.experimental
*/
public class SepDocValuesProducer extends DocValuesReaderBase {

View File

@ -20,12 +20,12 @@ package org.apache.lucene.index.codecs.mocksep;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.DocValuesFormat;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.index.codecs.sep.SepDocValuesConsumer;
import org.apache.lucene.index.codecs.sep.SepDocValuesProducer;
import org.apache.lucene.store.Directory;
@ -43,7 +43,7 @@ public class MockSepDocValuesFormat extends DocValuesFormat {
}
@Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SepDocValuesProducer(state);
}

View File

@ -1284,7 +1284,7 @@ public class TestAddIndexes extends LuceneTestCase {
w.close();
IndexReader sr = getOnlySegmentReader(r3);
assertEquals(2, sr.numDocs());
DocValues docValues = sr.perDocValues().docValues("dv");
DocValues docValues = sr.docValues("dv");
assertNotNull(docValues);
r3.close();
d3.close();

View File

@ -18,12 +18,15 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@ -516,29 +519,15 @@ public class TestDuelingCodecs extends LuceneTestCase {
* checks that docvalues across all fields are equivalent
*/
public void assertDocValues(IndexReader leftReader, IndexReader rightReader) throws Exception {
PerDocValues leftPerDoc = MultiPerDocValues.getPerDocs(leftReader);
PerDocValues rightPerDoc = MultiPerDocValues.getPerDocs(rightReader);
Fields leftFields = MultiFields.getFields(leftReader);
Fields rightFields = MultiFields.getFields(rightReader);
// Fields could be null if there are no postings,
// but then it must be null for both
if (leftFields == null || rightFields == null) {
assertNull(info, leftFields);
assertNull(info, rightFields);
return;
}
FieldsEnum fieldsEnum = leftFields.iterator();
String field;
while ((field = fieldsEnum.next()) != null) {
DocValues leftDocValues = leftPerDoc.docValues(field);
DocValues rightDocValues = rightPerDoc.docValues(field);
if (leftDocValues == null || rightDocValues == null) {
assertNull(info, leftDocValues);
assertNull(info, rightDocValues);
continue;
}
Set<String> leftValues = new HashSet<String>(leftReader.getFieldNames(FieldOption.DOC_VALUES));
Set<String> rightValues = new HashSet<String>(rightReader.getFieldNames(FieldOption.DOC_VALUES));
assertEquals(info, leftValues, rightValues);
for (String field : leftValues) {
DocValues leftDocValues = MultiDocValues.getDocValues(leftReader, field);
DocValues rightDocValues = MultiDocValues.getDocValues(rightReader, field);
assertNotNull(info, leftDocValues);
assertNotNull(info, rightDocValues);
assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource());
assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource());
}

View File

@ -37,13 +37,13 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MultiPerDocValues;
import org.apache.lucene.index.PerDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PerDocProducer;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
@ -104,7 +104,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
TopDocs search = searcher.search(query, 10);
assertEquals(5, search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
DocValues docValues = MultiPerDocValues.getPerDocs(reader).docValues("docId");
DocValues docValues = MultiDocValues.getDocValues(reader, "docId");
Source source = docValues.getSource();
for (int i = 0; i < scoreDocs.length; i++) {
assertEquals(i, scoreDocs[i].doc);
@ -418,24 +418,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
d.close();
}
private DocValues getDocValues(IndexReader reader, String field)
throws IOException {
boolean singleSeg = reader.getSequentialSubReaders().length == 1;
PerDocValues perDoc = singleSeg ? reader.getSequentialSubReaders()[0].perDocValues()
: MultiPerDocValues.getPerDocs(reader);
switch (random.nextInt(singleSeg ? 3 : 2)) { // case 2 only if single seg
case 0:
return perDoc.docValues(field);
case 1:
DocValues docValues = perDoc.docValues(field);
if (docValues != null) {
return docValues;
}
throw new RuntimeException("no such field " + field);
case 2:// this only works if we are on a single seg index!
return reader.getSequentialSubReaders()[0].docValues(field);
}
throw new RuntimeException();
private DocValues getDocValues(IndexReader reader, String field) throws IOException {
return MultiDocValues.getDocValues(reader, field);
}
private Source getSource(DocValues values) throws IOException {
@ -570,7 +554,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1);
IndexReader r = w.getReader();
w.close();
assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0));
assertEquals(17, r.getSequentialSubReaders()[0].docValues("field").load().getInt(0));
r.close();
d.close();
}
@ -600,7 +584,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1);
IndexReader r = w.getReader();
w.close();
assertEquals(17, r.getSequentialSubReaders()[0].perDocValues().docValues("field").load().getInt(0));
assertEquals(17, r.getSequentialSubReaders()[0].docValues("field").load().getInt(0));
r.close();
d.close();
}