mirror of https://github.com/apache/lucene.git
LUCENE-9959: Add non thread local based API for term vector reader usage (#180)
This commit is contained in:
parent
15034f6c90
commit
180cfa241b
|
@ -124,6 +124,9 @@ API Changes
|
|||
* LUCENE-9998: Remove unused parameter fis in StoredFieldsWriter.finish() and TermVectorsWriter.finish(),
|
||||
including those subclasses. (kkewwei)
|
||||
|
||||
* LUCENE-9959: Add non thread local based API for term vector reader usage. (Zach Chen, Adrien Grand,
|
||||
David Smiley, Robert Muir, Mike Drob)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
|
||||
|
|
|
@ -18,26 +18,18 @@ package org.apache.lucene.codecs;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; // javadocs
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
|
||||
/**
|
||||
* Codec API for reading term vectors:
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class TermVectorsReader implements Cloneable, Closeable {
|
||||
public abstract class TermVectorsReader extends TermVectors implements Cloneable, Closeable {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
|
||||
protected TermVectorsReader() {}
|
||||
|
||||
/**
|
||||
* Returns term vectors for this document, or null if term vectors were not indexed. If offsets
|
||||
* are available they are in an {@link OffsetAttribute} available from the {@link
|
||||
* org.apache.lucene.index.PostingsEnum}.
|
||||
*/
|
||||
public abstract Fields get(int doc) throws IOException;
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
*
|
||||
|
|
|
@ -112,10 +112,29 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Fields getTermVectors(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
final int i = readerIndex(docID); // find subreader num
|
||||
return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
|
||||
public final TermVectors getTermVectorsReader() {
|
||||
TermVectors[] termVectors = new TermVectors[subReaders.length];
|
||||
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int doc) throws IOException {
|
||||
ensureOpen();
|
||||
final int i = readerIndex(doc); // find subreader num
|
||||
|
||||
if (termVectors[i] != null) {
|
||||
return termVectors[i].get(doc - starts[i]); // dispatch to subreader
|
||||
} else {
|
||||
TermVectors reader = subReaders[i].getTermVectorsReader();
|
||||
if (reader != null) {
|
||||
// the getTermVectorsReader would clone a new instance, hence saving it into an array
|
||||
// to avoid re-cloning from direct subReaders[i].getTermVectorsReader() call
|
||||
termVectors[i] = reader;
|
||||
return reader.get(doc - starts[i]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -41,10 +41,11 @@ public abstract class CodecReader extends LeafReader {
|
|||
public abstract StoredFieldsReader getFieldsReader();
|
||||
|
||||
/**
|
||||
* Expert: retrieve thread-private TermVectorsReader
|
||||
* Expert: retrieve TermVectorsReader
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
@Override
|
||||
public abstract TermVectorsReader getTermVectorsReader();
|
||||
|
||||
/**
|
||||
|
@ -88,16 +89,6 @@ public abstract class CodecReader extends LeafReader {
|
|||
getFieldsReader().visitDocument(docID, visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Fields getTermVectors(int docID) throws IOException {
|
||||
TermVectorsReader termVectorsReader = getTermVectorsReader();
|
||||
if (termVectorsReader == null) {
|
||||
return null;
|
||||
}
|
||||
checkBounds(docID);
|
||||
return termVectorsReader.get(docID);
|
||||
}
|
||||
|
||||
private void checkBounds(int docID) {
|
||||
Objects.checkIndex(docID, maxDoc());
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ abstract class DocValuesLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Fields getTermVectors(int docID) throws IOException {
|
||||
public TermVectors getTermVectorsReader() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -351,9 +351,8 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
return in.getTermVectors(docID);
|
||||
public TermVectors getTermVectorsReader() {
|
||||
return in.getTermVectorsReader();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -307,8 +307,21 @@ public abstract class IndexReader implements Closeable {
|
|||
/**
|
||||
* Retrieve term vectors for this document, or null if term vectors were not indexed. The returned
|
||||
* Fields instance acts like a single-document inverted index (the docID will be 0).
|
||||
*
|
||||
* @deprecated Use {@link IndexReader#getTermVectorsReader} instead.
|
||||
*/
|
||||
public abstract Fields getTermVectors(int docID) throws IOException;
|
||||
@Deprecated
|
||||
public final Fields getTermVectors(int docID) throws IOException {
|
||||
TermVectors termVectors = getTermVectorsReader();
|
||||
if (termVectors != null) {
|
||||
return termVectors.get(docID);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
;
|
||||
|
||||
/** Get TermVectors from this index, or null if term vectors were not indexed. */
|
||||
public abstract TermVectors getTermVectorsReader();
|
||||
|
||||
/**
|
||||
* Retrieve term vector for this document and field, or null if term vectors were not indexed. The
|
||||
|
|
|
@ -184,13 +184,18 @@ class MergeReaderWrapper extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
checkBounds(docID);
|
||||
if (vectors == null) {
|
||||
return null;
|
||||
}
|
||||
return vectors.get(docID);
|
||||
public TermVectors getTermVectorsReader() {
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
checkBounds(docID);
|
||||
if (vectors == null) {
|
||||
return null;
|
||||
}
|
||||
return vectors.get(docID);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -300,21 +300,26 @@ public class ParallelLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
ensureOpen();
|
||||
ParallelFields fields = null;
|
||||
for (Map.Entry<String, LeafReader> ent : tvFieldToReader.entrySet()) {
|
||||
String fieldName = ent.getKey();
|
||||
Terms vector = ent.getValue().getTermVector(docID, fieldName);
|
||||
if (vector != null) {
|
||||
if (fields == null) {
|
||||
fields = new ParallelFields();
|
||||
public TermVectors getTermVectorsReader() {
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int doc) throws IOException {
|
||||
ensureOpen();
|
||||
ParallelFields fields = null;
|
||||
for (Map.Entry<String, LeafReader> ent : tvFieldToReader.entrySet()) {
|
||||
String fieldName = ent.getKey();
|
||||
Terms vector = ent.getValue().getTermVector(doc, fieldName);
|
||||
if (vector != null) {
|
||||
if (fields == null) {
|
||||
fields = new ParallelFields();
|
||||
}
|
||||
fields.addField(fieldName, vector);
|
||||
}
|
||||
}
|
||||
fields.addField(fieldName, vector);
|
||||
}
|
||||
}
|
||||
|
||||
return fields;
|
||||
return fields;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -57,7 +57,7 @@ final class SegmentCoreReaders {
|
|||
final NormsProducer normsProducer;
|
||||
|
||||
final StoredFieldsReader fieldsReaderOrig;
|
||||
final TermVectorsReader termVectorsReaderOrig;
|
||||
final TermVectorsReader termVectorsReader;
|
||||
final PointsReader pointsReader;
|
||||
final VectorReader vectorReader;
|
||||
final CompoundDirectory cfsReader;
|
||||
|
@ -80,14 +80,6 @@ final class SegmentCoreReaders {
|
|||
}
|
||||
};
|
||||
|
||||
final CloseableThreadLocal<TermVectorsReader> termVectorsLocal =
|
||||
new CloseableThreadLocal<TermVectorsReader>() {
|
||||
@Override
|
||||
protected TermVectorsReader initialValue() {
|
||||
return (termVectorsReaderOrig == null) ? null : termVectorsReaderOrig.clone();
|
||||
}
|
||||
};
|
||||
|
||||
private final Set<IndexReader.ClosedListener> coreClosedListeners =
|
||||
Collections.synchronizedSet(new LinkedHashSet<IndexReader.ClosedListener>());
|
||||
|
||||
|
@ -134,13 +126,13 @@ final class SegmentCoreReaders {
|
|||
.fieldsReader(cfsDir, si.info, coreFieldInfos, context);
|
||||
|
||||
if (coreFieldInfos.hasVectors()) { // open term vector files only as needed
|
||||
termVectorsReaderOrig =
|
||||
termVectorsReader =
|
||||
si.info
|
||||
.getCodec()
|
||||
.termVectorsFormat()
|
||||
.vectorsReader(cfsDir, si.info, coreFieldInfos, context);
|
||||
} else {
|
||||
termVectorsReaderOrig = null;
|
||||
termVectorsReader = null;
|
||||
}
|
||||
|
||||
if (coreFieldInfos.hasPointValues()) {
|
||||
|
@ -186,10 +178,9 @@ final class SegmentCoreReaders {
|
|||
if (ref.decrementAndGet() == 0) {
|
||||
try (Closeable finalizer = this::notifyCoreClosedListeners) {
|
||||
IOUtils.close(
|
||||
termVectorsLocal,
|
||||
fieldsReaderLocal,
|
||||
fields,
|
||||
termVectorsReaderOrig,
|
||||
termVectorsReader,
|
||||
fieldsReaderOrig,
|
||||
cfsReader,
|
||||
normsProducer,
|
||||
|
|
|
@ -242,12 +242,6 @@ public final class SegmentReader extends CodecReader {
|
|||
return si.info.maxDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermVectorsReader getTermVectorsReader() {
|
||||
ensureOpen();
|
||||
return core.termVectorsLocal.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StoredFieldsReader getFieldsReader() {
|
||||
ensureOpen();
|
||||
|
@ -310,6 +304,16 @@ public final class SegmentReader extends CodecReader {
|
|||
|
||||
private final Set<ClosedListener> readerClosedListeners = new CopyOnWriteArraySet<>();
|
||||
|
||||
@Override
|
||||
public TermVectorsReader getTermVectorsReader() {
|
||||
ensureOpen();
|
||||
if (core.termVectorsReader != null) {
|
||||
return core.termVectorsReader.clone();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void notifyReaderClosedListeners() throws IOException {
|
||||
synchronized (readerClosedListeners) {
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
||||
/** Index API to access TermVectors */
|
||||
public abstract class TermVectors {
|
||||
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
|
||||
protected TermVectors() {}
|
||||
|
||||
/**
|
||||
* Returns term vectors for this document, or null if term vectors were not indexed. If offsets
|
||||
* are available they are in an {@link OffsetAttribute} available from the {@link
|
||||
* org.apache.lucene.index.PostingsEnum}.
|
||||
*/
|
||||
public abstract Fields get(int doc) throws IOException;
|
||||
}
|
|
@ -19,7 +19,14 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.ExitableDirectoryReader.ExitingReaderException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -56,10 +57,10 @@ public class TestFilterCodecReader extends LuceneTestCase {
|
|||
final Method subClassMethod =
|
||||
subClass.getDeclaredMethod(
|
||||
superClassMethod.getName(), superClassMethod.getParameterTypes());
|
||||
assertEquals(
|
||||
"getReturnType() difference",
|
||||
superClassMethod.getReturnType(),
|
||||
subClassMethod.getReturnType());
|
||||
assertTrue(
|
||||
"getReturnType() difference and not compatible",
|
||||
isTypeEqualOrAssignable(
|
||||
superClassMethod.getReturnType(), subClassMethod.getReturnType()));
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
NoSuchMethodException e) {
|
||||
|
@ -67,4 +68,8 @@ public class TestFilterCodecReader extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isTypeEqualOrAssignable(Class<?> superClass, Class<?> subClass) {
|
||||
return Objects.equals(subClass, superClass) || superClass.isAssignableFrom(subClass);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.concurrent.ExecutorService;
|
|||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
|
@ -66,7 +67,7 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int doc) {
|
||||
public TermVectorsReader getTermVectorsReader() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,11 +18,15 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -123,11 +127,13 @@ public class TestMultiThreadTermVectors extends LuceneTestCase {
|
|||
private void testTermVectors() throws Exception {
|
||||
// check:
|
||||
int numDocs = reader.numDocs();
|
||||
TermVectors termVectors = reader.getTermVectorsReader();
|
||||
for (int docId = 0; docId < numDocs; docId++) {
|
||||
Fields vectors = reader.getTermVectors(docId);
|
||||
// reader is StandardDirectoryReader, method impl from BaseCompositeReader
|
||||
Fields vectors = termVectors.get(docId);
|
||||
// verify vectors result
|
||||
verifyVectors(vectors, docId);
|
||||
Terms vector = reader.getTermVectors(docId).terms("field");
|
||||
Terms vector = termVectors.get(docId).terms("field");
|
||||
verifyVector(vector.iterator(), docId);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.VectorValues;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -168,11 +169,16 @@ public class TermVectorLeafReader extends LeafReader {
|
|||
public void checkIntegrity() throws IOException {}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
if (docID != 0) {
|
||||
return null;
|
||||
}
|
||||
return fields;
|
||||
public TermVectors getTermVectorsReader() {
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int docID) {
|
||||
if (docID != 0) {
|
||||
return null;
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
||||
/**
|
||||
|
@ -35,11 +37,21 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
|
|||
@Override
|
||||
public OffsetsEnum getOffsetsEnum(LeafReader leafReader, int docId, String content)
|
||||
throws IOException {
|
||||
Terms docTerms = leafReader.getTermVector(docId, getField());
|
||||
if (docTerms == null) {
|
||||
Terms tvTerms = null;
|
||||
|
||||
TermVectors termVectors = leafReader.getTermVectorsReader();
|
||||
if (termVectors != null) {
|
||||
Fields vectors = termVectors.get(docId);
|
||||
if (vectors != null) {
|
||||
tvTerms = vectors.terms(getField());
|
||||
}
|
||||
}
|
||||
|
||||
if (tvTerms == null) {
|
||||
return OffsetsEnum.EMPTY;
|
||||
}
|
||||
leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms, getField());
|
||||
|
||||
leafReader = new TermVectorFilteredLeafReader(leafReader, tvTerms, getField());
|
||||
|
||||
return createOffsetsEnumFromReader(leafReader, docId);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.highlight.TermVectorLeafReader;
|
||||
|
||||
|
@ -40,7 +42,16 @@ public class TermVectorOffsetStrategy extends FieldOffsetStrategy {
|
|||
@Override
|
||||
public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content)
|
||||
throws IOException {
|
||||
Terms tvTerms = reader.getTermVector(docId, getField());
|
||||
Terms tvTerms = null;
|
||||
|
||||
TermVectors termVectors = reader.getTermVectorsReader();
|
||||
if (termVectors != null) {
|
||||
Fields vectors = termVectors.get(docId);
|
||||
if (vectors != null) {
|
||||
tvTerms = vectors.terms(getField());
|
||||
}
|
||||
}
|
||||
|
||||
if (tvTerms == null) {
|
||||
return OffsetsEnum.EMPTY;
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.lucene.index.MultiReader;
|
|||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.queries.spans.SpanQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -1102,11 +1103,10 @@ public class UnifiedHighlighter {
|
|||
return this.values;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps an IndexReader that remembers/caches the last call to {@link
|
||||
* LeafReader#getTermVectors(int)} so that if the next call has the same ID, then it is reused. If
|
||||
* TV's were column-stride (like doc-values), there would be no need for this.
|
||||
* Wraps an IndexReader that remembers/caches the last call to {@link TermVectors#get(int)} so
|
||||
* that if the next call has the same ID, then it is reused. If TV's were column-stride (like
|
||||
* doc-values), there would be no need for this.
|
||||
*/
|
||||
private static class TermVectorReusingLeafReader extends FilterLeafReader {
|
||||
|
||||
|
@ -1136,12 +1136,21 @@ public class UnifiedHighlighter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
if (docID != lastDocId) {
|
||||
lastDocId = docID;
|
||||
tvFields = in.getTermVectors(docID);
|
||||
public TermVectors getTermVectorsReader() {
|
||||
if (in.getTermVectorsReader() == null) {
|
||||
return null;
|
||||
}
|
||||
return tvFields;
|
||||
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int docID) throws IOException {
|
||||
if (docID != lastDocId) {
|
||||
lastDocId = docID;
|
||||
tvFields = in.getTermVectorsReader().get(docID);
|
||||
}
|
||||
return tvFields;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.index.LeafReader;
|
|||
import org.apache.lucene.index.ParallelLeafReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -133,20 +134,23 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
|
|||
@Override
|
||||
public LeafReader wrap(LeafReader reader) {
|
||||
return new FilterLeafReader(reader) {
|
||||
BitSet seenDocIDs = new BitSet();
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
// if we're invoked by ParallelLeafReader then we can't do our assertion. TODO see
|
||||
// LUCENE-6868
|
||||
if (callStackContains(ParallelLeafReader.class) == false
|
||||
&& callStackContains(CheckIndex.class) == false) {
|
||||
assertFalse(
|
||||
"Should not request TVs for doc more than once.", seenDocIDs.get(docID));
|
||||
seenDocIDs.set(docID);
|
||||
}
|
||||
|
||||
return super.getTermVectors(docID);
|
||||
public TermVectors getTermVectorsReader() {
|
||||
BitSet seenDocIDs = new BitSet();
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int docID) throws IOException {
|
||||
// if we're invoked by ParallelLeafReader then we can't do our assertion. TODO
|
||||
// see LUCENE-6868
|
||||
if (callStackContains(ParallelLeafReader.class) == false
|
||||
&& callStackContains(CheckIndex.class) == false) {
|
||||
assertFalse(
|
||||
"Should not request TVs for doc more than once.", seenDocIDs.get(docID));
|
||||
seenDocIDs.set(docID);
|
||||
}
|
||||
return reader.getTermVectorsReader().get(docID);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1235,6 +1235,20 @@ public class MemoryIndex {
|
|||
fieldInfos = new FieldInfos(fieldInfosArr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermVectors getTermVectorsReader() {
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int docID) {
|
||||
if (docID == 0) {
|
||||
return memoryFields;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private Info getInfoForExpectedDocValuesType(String fieldName, DocValuesType expectedType) {
|
||||
if (expectedType == DocValuesType.NONE) {
|
||||
return null;
|
||||
|
@ -1722,15 +1736,6 @@ public class MemoryIndex {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) {
|
||||
if (docID == 0) {
|
||||
return memoryFields;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.numDocs");
|
||||
|
|
|
@ -86,12 +86,6 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
return terms == null ? null : new AssertingTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
Fields fields = super.getTermVectors(docID);
|
||||
return fields == null ? null : new AssertingFields(fields);
|
||||
}
|
||||
|
||||
/** Wraps a Fields but with additional asserts */
|
||||
public static class AssertingFields extends FilterFields {
|
||||
public AssertingFields(Fields in) {
|
||||
|
|
|
@ -56,15 +56,20 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
Fields f = super.getTermVectors(docID);
|
||||
if (f == null) {
|
||||
return null;
|
||||
}
|
||||
f = new FieldFilterFields(f);
|
||||
// we need to check for emptyness, so we can return
|
||||
// null:
|
||||
return f.iterator().hasNext() ? f : null;
|
||||
public TermVectors getTermVectorsReader() {
|
||||
return new TermVectors() {
|
||||
@Override
|
||||
public Fields get(int docID) throws IOException {
|
||||
Fields f = in.getTermVectorsReader().get(docID);
|
||||
if (f == null) {
|
||||
return null;
|
||||
}
|
||||
f = new FieldFilterFields(f);
|
||||
// we need to check for emptyness, so we can return
|
||||
// null:
|
||||
return f.iterator().hasNext() ? f : null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,9 +23,9 @@ import static org.junit.Assert.assertTrue;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafMetaData;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -241,7 +241,7 @@ public class QueryUtils {
|
|||
public void checkIntegrity() throws IOException {}
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
public TermVectorsReader getTermVectorsReader() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue