mirror of https://github.com/apache/lucene.git
LUCENE-8292: Make TermsEnum fully abstract (#574)
This commit is contained in:
parent
249dfdae01
commit
8dfbbec892
|
@ -121,6 +121,8 @@ API Changes
|
|||
* LUCENE-8609: Remove IndexWriter#numDocs() and IndexWriter#maxDoc() in favor
|
||||
of IndexWriter#getDocStats(). (Simon Willnauer)
|
||||
|
||||
* LUCENE-8292: Make TermsEnum fully abstract. (Simon Willnauer)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# Apache Lucene Migration Guide
|
||||
|
||||
## TermsEnum.seekExact(BytesRef) is abstract (LUCENE-8662) ##
|
||||
## TermsEnum is now fully abstract (LUCENE-8292) ##
|
||||
|
||||
TermsEnum.seekExact has been changed to abstract, so non-abstract subclass must implement it.
|
||||
The default implementation can be seekCeil(text) == SeekStatus.FOUND.
|
||||
This method is performance critical, so subclass SHOULD have its own implementation
|
||||
if possible instead of using the default implementation.
|
||||
TermsEnum has been changed to be fully abstract, so non-abstract subclass must implement all it's methods.
|
||||
Non-Performance critical TermsEnums can use BaseTermsEnum as a base class instead. The change was motivated
|
||||
by several performance issues with FilterTermsEnum that caused significant slowdowns and massive memory consumption due
|
||||
to not delegating all method from TermsEnum. See LUCENE-8292 and LUCENE-8662
|
||||
|
||||
## Similarity.SimScorer.computeXXXFactor methods removed (LUCENE-8014) ##
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
|
@ -984,7 +985,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
|
|||
}
|
||||
}
|
||||
|
||||
private static class TermsDict extends TermsEnum {
|
||||
private static class TermsDict extends BaseTermsEnum {
|
||||
|
||||
final TermsDictEntry entry;
|
||||
final LongValues blockAddresses;
|
||||
|
@ -1031,11 +1032,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
|
|||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
if (ord < 0 || ord >= entry.termsDictSize) {
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.codecs.BlockTermState;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
|
@ -286,7 +287,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
// Iterates through terms in this field
|
||||
private final class SegmentTermsEnum extends TermsEnum {
|
||||
private final class SegmentTermsEnum extends BaseTermsEnum {
|
||||
private final IndexInput in;
|
||||
private final BlockTermState state;
|
||||
private final boolean doOrd;
|
||||
|
@ -685,11 +686,6 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
return ts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
//System.out.println("BTR.seek by ord ord=" + ord);
|
||||
|
|
|
@ -20,10 +20,10 @@ package org.apache.lucene.codecs.blocktreeords;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -34,7 +34,7 @@ import org.apache.lucene.util.automaton.RunAutomaton;
|
|||
import org.apache.lucene.util.fst.FST;
|
||||
|
||||
// NOTE: cannot seek!
|
||||
final class OrdsIntersectTermsEnum extends TermsEnum {
|
||||
final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
||||
final IndexInput in;
|
||||
|
||||
private OrdsIntersectTermsEnumFrame[] stack;
|
||||
|
|
|
@ -25,10 +25,10 @@ import java.io.PrintStream;
|
|||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.FST;
|
|||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
/** Iterates through terms in this field. */
|
||||
public final class OrdsSegmentTermsEnum extends TermsEnum {
|
||||
public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||
|
||||
// Lazy init:
|
||||
IndexInput in;
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.codecs.FieldsProducer;
|
|||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
|
@ -291,7 +292,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
static final class BloomFilteredTermsEnum extends TermsEnum {
|
||||
static final class BloomFilteredTermsEnum extends BaseTermsEnum {
|
||||
private Terms delegateTerms;
|
||||
private TermsEnum delegateTermsEnum;
|
||||
private final FuzzySet filter;
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.codecs.FieldsConsumer;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
|
@ -707,7 +708,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return hasPayloads;
|
||||
}
|
||||
|
||||
private final class DirectTermsEnum extends TermsEnum {
|
||||
private final class DirectTermsEnum extends BaseTermsEnum {
|
||||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private int termOrd;
|
||||
|
@ -952,7 +953,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
private final class DirectIntersectTermsEnum extends TermsEnum {
|
||||
private final class DirectIntersectTermsEnum extends BaseTermsEnum {
|
||||
private final RunAutomaton runAutomaton;
|
||||
private final CompiledAutomaton compiledAutomaton;
|
||||
private int termOrd;
|
||||
|
@ -1516,10 +1517,6 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -305,7 +305,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
// Only wraps common operations for PBF interact
|
||||
abstract class BaseTermsEnum extends TermsEnum {
|
||||
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
|
||||
|
||||
/* Current term's ord, starts from 0 */
|
||||
long ord;
|
||||
|
@ -626,11 +626,6 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
super.decodeStats();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -259,7 +259,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
// Only wraps common operations for PBF interact
|
||||
abstract class BaseTermsEnum extends TermsEnum {
|
||||
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
|
||||
|
||||
/* Current term stats + decoded metadata (customized by PBF) */
|
||||
final BlockTermState state;
|
||||
|
@ -519,11 +519,6 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
state.totalTermFreq = meta.totalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
decoded = false;
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.Map;
|
|||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
|
@ -111,7 +112,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
private class SimpleTextTermsEnum extends TermsEnum {
|
||||
private class SimpleTextTermsEnum extends BaseTermsEnum {
|
||||
private final IndexOptions indexOptions;
|
||||
private int docFreq;
|
||||
private long totalTermFreq;
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.SortedMap;
|
|||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -338,7 +339,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
private BytesRef payloads[];
|
||||
}
|
||||
|
||||
private static class SimpleTVTermsEnum extends TermsEnum {
|
||||
private static class SimpleTVTermsEnum extends BaseTermsEnum {
|
||||
SortedMap<BytesRef,SimpleTVPostings> terms;
|
||||
Iterator<Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings>> iterator;
|
||||
Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings> current;
|
||||
|
@ -358,11 +359,6 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -19,11 +19,11 @@ package org.apache.lucene.codecs.blocktree;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -44,7 +44,7 @@ import org.apache.lucene.util.fst.Outputs;
|
|||
* Likewise, in next it scans until it finds a term that matches the
|
||||
* current automaton transition. */
|
||||
|
||||
final class IntersectTermsEnum extends TermsEnum {
|
||||
final class IntersectTermsEnum extends BaseTermsEnum {
|
||||
|
||||
//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
|
||||
|
|
|
@ -21,10 +21,10 @@ import java.io.IOException;
|
|||
import java.io.PrintStream;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -36,7 +36,7 @@ import org.apache.lucene.util.fst.Util;
|
|||
|
||||
/** Iterates through terms in this field. */
|
||||
|
||||
final class SegmentTermsEnum extends TermsEnum {
|
||||
final class SegmentTermsEnum extends BaseTermsEnum {
|
||||
|
||||
// Lazy init:
|
||||
IndexInput in;
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.NoSuchElementException;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -825,7 +826,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
|
||||
}
|
||||
|
||||
private static class TVTermsEnum extends TermsEnum {
|
||||
private static class TVTermsEnum extends BaseTermsEnum {
|
||||
|
||||
private int numTerms, startPos, ord;
|
||||
private int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
|
||||
|
@ -906,11 +907,6 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
|
@ -926,7 +927,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
}
|
||||
}
|
||||
|
||||
private static class TermsDict extends TermsEnum {
|
||||
private static class TermsDict extends BaseTermsEnum {
|
||||
|
||||
final TermsDictEntry entry;
|
||||
final LongValues blockAddresses;
|
||||
|
@ -973,11 +974,6 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
if (ord < 0 || ord >= entry.termsDictSize) {
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* A base TermsEnum that adds default implementations for
|
||||
* <ul>
|
||||
* <li>{@link #attributes()}</li>
|
||||
* <li>{@link #termState()}</li>
|
||||
* <li>{@link #seekExact(BytesRef)}</li>
|
||||
* <li>{@link #seekExact(BytesRef, TermState)}</li>
|
||||
* </ul>
|
||||
*
|
||||
* In some cases, the default implementation may be slow and consume huge memory, so subclass SHOULD have its own
|
||||
* implementation if possible.
|
||||
*/
|
||||
public abstract class BaseTermsEnum extends TermsEnum {
|
||||
|
||||
private AttributeSource atts = null;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected BaseTermsEnum() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
return new TermState() {
|
||||
@Override
|
||||
public void copyFrom(TermState other) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
if (!seekExact(term)) {
|
||||
throw new IllegalArgumentException("term=" + term + " does not exist");
|
||||
}
|
||||
}
|
||||
|
||||
public AttributeSource attributes() {
|
||||
if (atts == null) {
|
||||
atts = new AttributeSource();
|
||||
}
|
||||
return atts;
|
||||
}
|
||||
}
|
|
@ -223,6 +223,16 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return in.impacts(flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
in.seekExact(term, state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
return in.termState();
|
||||
}
|
||||
}
|
||||
|
||||
/** Base class for filtering {@link PostingsEnum} implementations. */
|
||||
|
|
|
@ -124,7 +124,7 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
private static class FreqProxTermsEnum extends TermsEnum {
|
||||
private static class FreqProxTermsEnum extends BaseTermsEnum {
|
||||
final FreqProxTermsWriterPerField terms;
|
||||
final int[] sortedTermIDs;
|
||||
final FreqProxPostingsArray postingsArray;
|
||||
|
@ -180,11 +180,6 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
public void seekExact(long ord) {
|
||||
this.ord = (int) ord;
|
||||
int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]];
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class MultiTermsEnum extends TermsEnum {
|
||||
public final class MultiTermsEnum extends BaseTermsEnum {
|
||||
|
||||
private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR = new Comparator<TermsEnumWithSlice>() {
|
||||
@Override
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
/** Implements a {@link TermsEnum} wrapping a provided
|
||||
* {@link SortedDocValues}. */
|
||||
|
||||
class SortedDocValuesTermsEnum extends TermsEnum {
|
||||
class SortedDocValuesTermsEnum extends BaseTermsEnum {
|
||||
private final SortedDocValues values;
|
||||
private int currentOrd = -1;
|
||||
private final BytesRefBuilder scratch;
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.io.IOException;
|
|||
/** Implements a {@link TermsEnum} wrapping a provided
|
||||
* {@link SortedSetDocValues}. */
|
||||
|
||||
class SortedSetDocValuesTermsEnum extends TermsEnum {
|
||||
class SortedSetDocValuesTermsEnum extends BaseTermsEnum {
|
||||
private final SortedSetDocValues values;
|
||||
private long currentOrd = -1;
|
||||
private final BytesRefBuilder scratch;
|
||||
|
|
|
@ -42,21 +42,16 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
* @lucene.experimental */
|
||||
public abstract class TermsEnum implements BytesRefIterator {
|
||||
|
||||
private AttributeSource atts = null;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected TermsEnum() {
|
||||
}
|
||||
|
||||
/** Returns the related attributes. */
|
||||
public AttributeSource attributes() {
|
||||
if (atts == null) atts = new AttributeSource();
|
||||
return atts;
|
||||
}
|
||||
public abstract AttributeSource attributes();
|
||||
|
||||
/** Represents returned result from {@link #seekCeil}. */
|
||||
public static enum SeekStatus {
|
||||
public enum SeekStatus {
|
||||
/** The term was not found, and the end of iteration was hit. */
|
||||
END,
|
||||
/** The precise term was found. */
|
||||
|
@ -70,15 +65,11 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
* unpositioned. For some codecs, seekExact may be substantially faster than {@link #seekCeil}.
|
||||
* <p>
|
||||
*
|
||||
* The default implementation can be <code>seekCeil(text) == SeekStatus.FOUND; </code><br>
|
||||
* But this method is performance critical. In some cases, the default implementation may be slow and consume huge memory,
|
||||
* so subclass SHOULD have its own implementation if possible.
|
||||
*
|
||||
*
|
||||
* @return true if the term is found; return false if the enum is unpositioned.
|
||||
*/
|
||||
public abstract boolean seekExact(BytesRef text) throws IOException;
|
||||
|
||||
|
||||
/** Seeks to the specified term, if it exists, or to the
|
||||
* next (ceiling) term. Returns SeekStatus to
|
||||
* indicate whether exact term was found, a different
|
||||
|
@ -114,11 +105,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
* @param term the term the TermState corresponds to
|
||||
* @param state the {@link TermState}
|
||||
* */
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
if (!seekExact(term)) {
|
||||
throw new IllegalArgumentException("term=" + term + " does not exist");
|
||||
}
|
||||
}
|
||||
public abstract void seekExact(BytesRef term, TermState state) throws IOException;
|
||||
|
||||
/** Returns current term. Do not call this when the enum
|
||||
* is unpositioned. */
|
||||
|
@ -192,14 +179,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
* @see TermState
|
||||
* @see #seekExact(BytesRef, TermState)
|
||||
*/
|
||||
public TermState termState() throws IOException {
|
||||
return new TermState() {
|
||||
@Override
|
||||
public void copyFrom(TermState other) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
public abstract TermState termState() throws IOException;
|
||||
|
||||
/** An empty TermsEnum for quickly returning an empty instance e.g.
|
||||
* in {@link org.apache.lucene.search.MultiTermQuery}
|
||||
|
@ -208,15 +188,10 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
* This should not be a problem, as the enum is always empty and
|
||||
* the existence of unused Attributes does not matter.
|
||||
*/
|
||||
public static final TermsEnum EMPTY = new TermsEnum() {
|
||||
public static final TermsEnum EMPTY = new BaseTermsEnum() {
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef term) { return SeekStatus.END; }
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) {}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -44,7 +45,7 @@ import java.util.Arrays;
|
|||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
||||
* greater than all that precede it.</p>
|
||||
*/
|
||||
public final class FuzzyTermsEnum extends TermsEnum {
|
||||
public final class FuzzyTermsEnum extends BaseTermsEnum {
|
||||
|
||||
// NOTE: we can't subclass FilteredTermsEnum here because we need to sometimes change actualEnum:
|
||||
private TermsEnum actualEnum;
|
||||
|
|
|
@ -613,7 +613,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static class DataTermsEnum extends TermsEnum {
|
||||
private static class DataTermsEnum extends BaseTermsEnum {
|
||||
final FieldData fieldData;
|
||||
private int upto = -1;
|
||||
|
||||
|
@ -653,11 +653,6 @@ public class TestCodecs extends LuceneTestCase {
|
|||
return SeekStatus.END;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -1332,7 +1332,7 @@ public class MemoryIndex {
|
|||
}
|
||||
}
|
||||
|
||||
private class MemoryTermsEnum extends TermsEnum {
|
||||
private class MemoryTermsEnum extends BaseTermsEnum {
|
||||
private final Info info;
|
||||
private final BytesRef br = new BytesRef();
|
||||
int termUpto = -1;
|
||||
|
|
|
@ -20,11 +20,11 @@ import java.io.IOException;
|
|||
import java.io.PrintStream;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -39,7 +39,7 @@ import org.apache.lucene.util.fst.Util;
|
|||
* can cast it to call {@link #seekExact(BytesRef, long)} for
|
||||
* optimistic-concurrency, and also {@link #getVersion} to get the
|
||||
* version of the currently seek'd term. */
|
||||
public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
||||
public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||
|
||||
// Lazy init:
|
||||
IndexInput in;
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.codecs.FieldsProducer;
|
|||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -403,7 +404,7 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
static class RAMTermsEnum extends TermsEnum {
|
||||
static class RAMTermsEnum extends BaseTermsEnum {
|
||||
Iterator<String> it;
|
||||
String current;
|
||||
private final RAMField ramField;
|
||||
|
@ -444,11 +445,6 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -519,7 +519,7 @@ public class RandomPostingsTester {
|
|||
}
|
||||
}
|
||||
|
||||
private static class SeedTermsEnum extends TermsEnum {
|
||||
private static class SeedTermsEnum extends BaseTermsEnum {
|
||||
final SortedMap<BytesRef,SeedAndOrd> terms;
|
||||
final IndexOptions maxAllowed;
|
||||
final boolean allowPayloads;
|
||||
|
@ -564,11 +564,6 @@ public class RandomPostingsTester {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -176,7 +177,7 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
|||
}
|
||||
|
||||
|
||||
private class RangeTermsEnum extends TermsEnum {
|
||||
private class RangeTermsEnum extends BaseTermsEnum {
|
||||
|
||||
TermsEnum te;
|
||||
BytesRef curr;
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.List;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.index.BaseTermsEnum;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -589,7 +590,7 @@ public class DocTermOrds implements Accountable {
|
|||
* "wrap" our own terms index around the original IndexReader.
|
||||
* Only valid if there are terms for this field rom the original reader
|
||||
*/
|
||||
private final class OrdWrappedTermsEnum extends TermsEnum {
|
||||
private final class OrdWrappedTermsEnum extends BaseTermsEnum {
|
||||
private final TermsEnum termsEnum;
|
||||
private BytesRef term;
|
||||
private long ord = -indexInterval-1; // force "real" seek
|
||||
|
@ -703,11 +704,6 @@ public class DocTermOrds implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return seekCeil(text) == SeekStatus.FOUND;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long targetOrd) throws IOException {
|
||||
int delta = (int) (targetOrd - ordBase - ord);
|
||||
|
|
Loading…
Reference in New Issue