Revert "LUCENE-9959: Add non thread local based API for term vector reader usage (#180)" (#280)

This reverts commit 180cfa241b.
This commit is contained in:
zacharymorn 2021-09-03 00:31:18 -07:00 committed by GitHub
parent 44e9f5de53
commit d4e4fe22b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 135 additions and 254 deletions

View File

@ -133,9 +133,6 @@ API Changes
* LUCENE-9998: Remove unused parameter fis in StoredFieldsWriter.finish() and TermVectorsWriter.finish(),
including those subclasses. (kkewwei)
* LUCENE-9959: Add non thread local based API for term vector reader usage. (Zach Chen, Adrien Grand,
David Smiley, Robert Muir, Mike Drob)
* LUCENE-7020: TieredMergePolicy#setMaxMergeAtOnceExplicit has been removed.
TieredMergePolicy no longer sets a limit on the maximum number of segments
that can be merged at once via a forced merge. (Adrien Grand, Shawn Heisey)

View File

@ -18,18 +18,26 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; // javadocs
import org.apache.lucene.index.Fields;
/**
* Codec API for reading term vectors:
*
* @lucene.experimental
*/
public abstract class TermVectorsReader extends TermVectors implements Cloneable, Closeable {
public abstract class TermVectorsReader implements Cloneable, Closeable {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected TermVectorsReader() {}
/**
* Returns term vectors for this document, or null if term vectors were not indexed. If offsets
* are available they are in an {@link OffsetAttribute} available from the {@link
* org.apache.lucene.index.PostingsEnum}.
*/
public abstract Fields get(int doc) throws IOException;
/**
* Checks consistency of this reader.
*

View File

@ -112,29 +112,10 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
}
@Override
public final TermVectors getTermVectorsReader() {
TermVectors[] termVectors = new TermVectors[subReaders.length];
return new TermVectors() {
@Override
public Fields get(int doc) throws IOException {
ensureOpen();
final int i = readerIndex(doc); // find subreader num
if (termVectors[i] != null) {
return termVectors[i].get(doc - starts[i]); // dispatch to subreader
} else {
TermVectors reader = subReaders[i].getTermVectorsReader();
if (reader != null) {
// the getTermVectorsReader would clone a new instance, hence saving it into an array
// to avoid re-cloning from direct subReaders[i].getTermVectorsReader() call
termVectors[i] = reader;
return reader.get(doc - starts[i]);
}
return null;
}
}
};
public final Fields getTermVectors(int docID) throws IOException {
ensureOpen();
final int i = readerIndex(docID); // find subreader num
return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
}
@Override

View File

@ -42,11 +42,10 @@ public abstract class CodecReader extends LeafReader {
public abstract StoredFieldsReader getFieldsReader();
/**
* Expert: retrieve TermVectorsReader
* Expert: retrieve thread-private TermVectorsReader
*
* @lucene.internal
*/
@Override
public abstract TermVectorsReader getTermVectorsReader();
/**
@ -90,6 +89,16 @@ public abstract class CodecReader extends LeafReader {
getFieldsReader().visitDocument(docID, visitor);
}
@Override
public final Fields getTermVectors(int docID) throws IOException {
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null) {
return null;
}
checkBounds(docID);
return termVectorsReader.get(docID);
}
private void checkBounds(int docID) {
Objects.checkIndex(docID, maxDoc());
}

View File

@ -69,7 +69,7 @@ abstract class DocValuesLeafReader extends LeafReader {
}
@Override
public TermVectors getTermVectorsReader() {
public final Fields getTermVectors(int docID) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -351,8 +351,9 @@ public abstract class FilterLeafReader extends LeafReader {
}
@Override
public TermVectors getTermVectorsReader() {
return in.getTermVectorsReader();
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
return in.getTermVectors(docID);
}
@Override

View File

@ -307,20 +307,8 @@ public abstract class IndexReader implements Closeable {
/**
* Retrieve term vectors for this document, or null if term vectors were not indexed. The returned
* Fields instance acts like a single-document inverted index (the docID will be 0).
*
* @deprecated Use {@link IndexReader#getTermVectorsReader} instead.
*/
@Deprecated
public final Fields getTermVectors(int docID) throws IOException {
TermVectors termVectors = getTermVectorsReader();
if (termVectors != null) {
return termVectors.get(docID);
}
return null;
}
/** Get TermVectors from this index, or null if term vectors were not indexed. */
public abstract TermVectors getTermVectorsReader();
public abstract Fields getTermVectors(int docID) throws IOException;
/**
* Retrieve term vector for this document and field, or null if term vectors were not indexed. The

View File

@ -184,18 +184,13 @@ class MergeReaderWrapper extends LeafReader {
}
@Override
public TermVectors getTermVectorsReader() {
return new TermVectors() {
@Override
public Fields get(int docID) throws IOException {
ensureOpen();
checkBounds(docID);
if (vectors == null) {
return null;
}
return vectors.get(docID);
}
};
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
checkBounds(docID);
if (vectors == null) {
return null;
}
return vectors.get(docID);
}
@Override

View File

@ -300,26 +300,21 @@ public class ParallelLeafReader extends LeafReader {
}
@Override
public TermVectors getTermVectorsReader() {
return new TermVectors() {
@Override
public Fields get(int doc) throws IOException {
ensureOpen();
ParallelFields fields = null;
for (Map.Entry<String, LeafReader> ent : tvFieldToReader.entrySet()) {
String fieldName = ent.getKey();
Terms vector = ent.getValue().getTermVector(doc, fieldName);
if (vector != null) {
if (fields == null) {
fields = new ParallelFields();
}
fields.addField(fieldName, vector);
}
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
ParallelFields fields = null;
for (Map.Entry<String, LeafReader> ent : tvFieldToReader.entrySet()) {
String fieldName = ent.getKey();
Terms vector = ent.getValue().getTermVector(docID, fieldName);
if (vector != null) {
if (fields == null) {
fields = new ParallelFields();
}
return fields;
fields.addField(fieldName, vector);
}
};
}
return fields;
}
@Override

View File

@ -57,7 +57,7 @@ final class SegmentCoreReaders {
final NormsProducer normsProducer;
final StoredFieldsReader fieldsReaderOrig;
final TermVectorsReader termVectorsReader;
final TermVectorsReader termVectorsReaderOrig;
final PointsReader pointsReader;
final KnnVectorsReader knnVectorsReader;
final CompoundDirectory cfsReader;
@ -80,6 +80,14 @@ final class SegmentCoreReaders {
}
};
final CloseableThreadLocal<TermVectorsReader> termVectorsLocal =
new CloseableThreadLocal<TermVectorsReader>() {
@Override
protected TermVectorsReader initialValue() {
return (termVectorsReaderOrig == null) ? null : termVectorsReaderOrig.clone();
}
};
private final Set<IndexReader.ClosedListener> coreClosedListeners =
Collections.synchronizedSet(new LinkedHashSet<IndexReader.ClosedListener>());
@ -126,13 +134,13 @@ final class SegmentCoreReaders {
.fieldsReader(cfsDir, si.info, coreFieldInfos, context);
if (coreFieldInfos.hasVectors()) { // open term vector files only as needed
termVectorsReader =
termVectorsReaderOrig =
si.info
.getCodec()
.termVectorsFormat()
.vectorsReader(cfsDir, si.info, coreFieldInfos, context);
} else {
termVectorsReader = null;
termVectorsReaderOrig = null;
}
if (coreFieldInfos.hasPointValues()) {
@ -178,9 +186,10 @@ final class SegmentCoreReaders {
if (ref.decrementAndGet() == 0) {
try (Closeable finalizer = this::notifyCoreClosedListeners) {
IOUtils.close(
termVectorsLocal,
fieldsReaderLocal,
fields,
termVectorsReader,
termVectorsReaderOrig,
fieldsReaderOrig,
cfsReader,
normsProducer,

View File

@ -242,6 +242,12 @@ public final class SegmentReader extends CodecReader {
return si.info.maxDoc();
}
@Override
public TermVectorsReader getTermVectorsReader() {
ensureOpen();
return core.termVectorsLocal.get();
}
@Override
public StoredFieldsReader getFieldsReader() {
ensureOpen();
@ -304,16 +310,6 @@ public final class SegmentReader extends CodecReader {
private final Set<ClosedListener> readerClosedListeners = new CopyOnWriteArraySet<>();
@Override
public TermVectorsReader getTermVectorsReader() {
ensureOpen();
if (core.termVectorsReader != null) {
return core.termVectorsReader.clone();
} else {
return null;
}
}
@Override
void notifyReaderClosedListeners() throws IOException {
synchronized (readerClosedListeners) {

View File

@ -1,33 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
/** Index API to access TermVectors */
public abstract class TermVectors {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected TermVectors() {}
/**
* Returns term vectors for this document, or null if term vectors were not indexed. If offsets
* are available they are in an {@link OffsetAttribute} available from the {@link
* org.apache.lucene.index.PostingsEnum}.
*/
public abstract Fields get(int doc) throws IOException;
}

View File

@ -19,14 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.*;
import org.apache.lucene.index.ExitableDirectoryReader.ExitingReaderException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.util.Objects;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -57,10 +56,10 @@ public class TestFilterCodecReader extends LuceneTestCase {
final Method subClassMethod =
subClass.getDeclaredMethod(
superClassMethod.getName(), superClassMethod.getParameterTypes());
assertTrue(
"getReturnType() difference and not compatible",
isTypeEqualOrAssignable(
superClassMethod.getReturnType(), subClassMethod.getReturnType()));
assertEquals(
"getReturnType() difference",
superClassMethod.getReturnType(),
subClassMethod.getReturnType());
} catch (
@SuppressWarnings("unused")
NoSuchMethodException e) {
@ -68,8 +67,4 @@ public class TestFilterCodecReader extends LuceneTestCase {
}
}
}
private boolean isTypeEqualOrAssignable(Class<?> superClass, Class<?> subClass) {
return Objects.equals(subClass, superClass) || superClass.isAssignableFrom(subClass);
}
}

View File

@ -24,7 +24,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -67,7 +66,7 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
}
@Override
public TermVectorsReader getTermVectorsReader() {
public Fields getTermVectors(int doc) {
return null;
}

View File

@ -18,15 +18,11 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
@ -127,13 +123,11 @@ public class TestMultiThreadTermVectors extends LuceneTestCase {
private void testTermVectors() throws Exception {
// check:
int numDocs = reader.numDocs();
TermVectors termVectors = reader.getTermVectorsReader();
for (int docId = 0; docId < numDocs; docId++) {
// reader is StandardDirectoryReader, method impl from BaseCompositeReader
Fields vectors = termVectors.get(docId);
Fields vectors = reader.getTermVectors(docId);
// verify vectors result
verifyVectors(vectors, docId);
Terms vector = termVectors.get(docId).terms("field");
Terms vector = reader.getTermVectors(docId).terms("field");
verifyVector(vector.iterator(), docId);
}
}

View File

@ -33,7 +33,6 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
@ -170,16 +169,11 @@ public class TermVectorLeafReader extends LeafReader {
public void checkIntegrity() throws IOException {}
@Override
public TermVectors getTermVectorsReader() {
return new TermVectors() {
@Override
public Fields get(int docID) {
if (docID != 0) {
return null;
}
return fields;
}
};
public Fields getTermVectors(int docID) throws IOException {
if (docID != 0) {
return null;
}
return fields;
}
@Override

View File

@ -17,9 +17,7 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
/**
@ -37,21 +35,11 @@ public class PostingsWithTermVectorsOffsetStrategy extends FieldOffsetStrategy {
@Override
public OffsetsEnum getOffsetsEnum(LeafReader leafReader, int docId, String content)
throws IOException {
Terms tvTerms = null;
TermVectors termVectors = leafReader.getTermVectorsReader();
if (termVectors != null) {
Fields vectors = termVectors.get(docId);
if (vectors != null) {
tvTerms = vectors.terms(getField());
}
}
if (tvTerms == null) {
Terms docTerms = leafReader.getTermVector(docId, getField());
if (docTerms == null) {
return OffsetsEnum.EMPTY;
}
leafReader = new TermVectorFilteredLeafReader(leafReader, tvTerms, getField());
leafReader = new TermVectorFilteredLeafReader(leafReader, docTerms, getField());
return createOffsetsEnumFromReader(leafReader, docId);
}

View File

@ -17,9 +17,7 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.highlight.TermVectorLeafReader;
@ -42,16 +40,7 @@ public class TermVectorOffsetStrategy extends FieldOffsetStrategy {
@Override
public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content)
throws IOException {
Terms tvTerms = null;
TermVectors termVectors = reader.getTermVectorsReader();
if (termVectors != null) {
Fields vectors = termVectors.get(docId);
if (vectors != null) {
tvTerms = vectors.terms(getField());
}
}
Terms tvTerms = reader.getTermVector(docId, getField());
if (tvTerms == null) {
return OffsetsEnum.EMPTY;
}

View File

@ -48,7 +48,6 @@ import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
@ -1103,10 +1102,11 @@ public class UnifiedHighlighter {
return this.values;
}
}
/**
* Wraps an IndexReader that remembers/caches the last call to {@link TermVectors#get(int)} so
* that if the next call has the same ID, then it is reused. If TV's were column-stride (like
* doc-values), there would be no need for this.
* Wraps an IndexReader that remembers/caches the last call to {@link
* LeafReader#getTermVectors(int)} so that if the next call has the same ID, then it is reused. If
* TV's were column-stride (like doc-values), there would be no need for this.
*/
private static class TermVectorReusingLeafReader extends FilterLeafReader {
@ -1136,21 +1136,12 @@ public class UnifiedHighlighter {
}
@Override
public TermVectors getTermVectorsReader() {
if (in.getTermVectorsReader() == null) {
return null;
public Fields getTermVectors(int docID) throws IOException {
if (docID != lastDocId) {
lastDocId = docID;
tvFields = in.getTermVectors(docID);
}
return new TermVectors() {
@Override
public Fields get(int docID) throws IOException {
if (docID != lastDocId) {
lastDocId = docID;
tvFields = in.getTermVectorsReader().get(docID);
}
return tvFields;
}
};
return tvFields;
}
@Override

View File

@ -40,7 +40,6 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.ParallelLeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@ -134,23 +133,20 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
@Override
public LeafReader wrap(LeafReader reader) {
return new FilterLeafReader(reader) {
BitSet seenDocIDs = new BitSet();
@Override
public TermVectors getTermVectorsReader() {
BitSet seenDocIDs = new BitSet();
return new TermVectors() {
@Override
public Fields get(int docID) throws IOException {
// if we're invoked by ParallelLeafReader then we can't do our assertion. TODO
// see LUCENE-6868
if (callStackContains(ParallelLeafReader.class) == false
&& callStackContains(CheckIndex.class) == false) {
assertFalse(
"Should not request TVs for doc more than once.", seenDocIDs.get(docID));
seenDocIDs.set(docID);
}
return reader.getTermVectorsReader().get(docID);
}
};
public Fields getTermVectors(int docID) throws IOException {
// if we're invoked by ParallelLeafReader then we can't do our assertion. TODO see
// LUCENE-6868
if (callStackContains(ParallelLeafReader.class) == false
&& callStackContains(CheckIndex.class) == false) {
assertFalse(
"Should not request TVs for doc more than once.", seenDocIDs.get(docID));
seenDocIDs.set(docID);
}
return super.getTermVectors(docID);
}
@Override

View File

@ -1235,20 +1235,6 @@ public class MemoryIndex {
fieldInfos = new FieldInfos(fieldInfosArr);
}
@Override
public TermVectors getTermVectorsReader() {
return new TermVectors() {
@Override
public Fields get(int docID) {
if (docID == 0) {
return memoryFields;
} else {
return null;
}
}
};
}
private Info getInfoForExpectedDocValuesType(String fieldName, DocValuesType expectedType) {
if (expectedType == DocValuesType.NONE) {
return null;
@ -1736,6 +1722,15 @@ public class MemoryIndex {
}
}
@Override
public Fields getTermVectors(int docID) {
if (docID == 0) {
return memoryFields;
} else {
return null;
}
}
@Override
public int numDocs() {
if (DEBUG) System.err.println("MemoryIndexReader.numDocs");

View File

@ -86,6 +86,12 @@ public class AssertingLeafReader extends FilterLeafReader {
return terms == null ? null : new AssertingTerms(terms);
}
@Override
public Fields getTermVectors(int docID) throws IOException {
Fields fields = super.getTermVectors(docID);
return fields == null ? null : new AssertingFields(fields);
}
/** Wraps a Fields but with additional asserts */
public static class AssertingFields extends FilterFields {
public AssertingFields(Fields in) {

View File

@ -56,20 +56,15 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
}
@Override
public TermVectors getTermVectorsReader() {
return new TermVectors() {
@Override
public Fields get(int docID) throws IOException {
Fields f = in.getTermVectorsReader().get(docID);
if (f == null) {
return null;
}
f = new FieldFilterFields(f);
// we need to check for emptyness, so we can return
// null:
return f.iterator().hasNext() ? f : null;
}
};
public Fields getTermVectors(int docID) throws IOException {
Fields f = super.getTermVectors(docID);
if (f == null) {
return null;
}
f = new FieldFilterFields(f);
// we need to check for emptyness, so we can return
// null:
return f.iterator().hasNext() ? f : null;
}
@Override

View File

@ -23,9 +23,9 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.List;
import java.util.Random;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
@ -241,7 +241,7 @@ public class QueryUtils {
public void checkIntegrity() throws IOException {}
@Override
public TermVectorsReader getTermVectorsReader() {
public Fields getTermVectors(int docID) throws IOException {
return null;
}