diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 76cf7036e23..f7f60091605 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -133,6 +133,8 @@ API Changes * LUCENE-8609: Remove IndexWriter#numDocs() and IndexWriter#maxDoc() in favor of IndexWriter#getDocStats(). (Simon Willnauer) +* LUCENE-8292: Make TermsEnum fully abstract. (Simon Willnauer) + Changes in Runtime Behavior * LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 16e81019316..4e4a16a8f38 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -1,11 +1,11 @@ # Apache Lucene Migration Guide -## TermsEnum.seekExact(BytesRef) is abstract (LUCENE-8662) ## +## TermsEnum is now fully abstract (LUCENE-8292) ## -TermsEnum.seekExact has been changed to abstract, so non-abstract subclass must implement it. -The default implementation can be seekCeil(text) == SeekStatus.FOUND. -This method is performance critical, so subclass SHOULD have its own implementation -if possible instead of using the default implementation. +TermsEnum has been changed to be fully abstract, so non-abstract subclass must implement all it's methods. +Non-Performance critical TermsEnums can use BaseTermsEnum as a base class instead. The change was motivated +by several performance issues with FilterTermsEnum that caused significant slowdowns and massive memory consumption due +to not delegating all method from TermsEnum. See LUCENE-8292 and LUCENE-8662 ## RAMDirectory, RAMFile, RAMInputStream, RAMOutputStream removed ## diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java index f5d5780583f..113a957b78e 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; @@ -984,7 +985,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close } } - private static class TermsDict extends TermsEnum { + private static class TermsDict extends BaseTermsEnum { final TermsDictEntry entry; final LongValues blockAddresses; @@ -1031,11 +1032,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close return term; } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) throws IOException { if (ord < 0 || ord >= entry.termsDictSize) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index 9ac0e345694..964f616c6ff 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -29,6 +29,7 @@ import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.ImpactsEnum; @@ -286,7 +287,7 @@ public class BlockTermsReader extends FieldsProducer { } // Iterates through terms in this field - private final class SegmentTermsEnum extends TermsEnum { + private final class SegmentTermsEnum extends BaseTermsEnum { private final IndexInput in; private final BlockTermState state; private final boolean doOrd; @@ -685,11 +686,6 @@ public class BlockTermsReader extends FieldsProducer { return ts; } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) throws IOException { //System.out.println("BTR.seek by ord ord=" + ord); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java index a8925499c52..9434ca8928d 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java @@ -20,10 +20,10 @@ package org.apache.lucene.codecs.blocktreeords; import java.io.IOException; import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -34,7 +34,7 @@ import org.apache.lucene.util.automaton.RunAutomaton; import org.apache.lucene.util.fst.FST; // NOTE: cannot seek! -final class OrdsIntersectTermsEnum extends TermsEnum { +final class OrdsIntersectTermsEnum extends BaseTermsEnum { final IndexInput in; private OrdsIntersectTermsEnumFrame[] stack; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java index bd67adc2714..f1d930e4ce4 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java @@ -25,10 +25,10 @@ import java.io.PrintStream; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; @@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Util; /** Iterates through terms in this field. */ -public final class OrdsSegmentTermsEnum extends TermsEnum { +public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // Lazy init: IndexInput in; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java index 28febf38ad0..b9a23992281 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java @@ -32,6 +32,7 @@ import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.ImpactsEnum; @@ -291,7 +292,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { } } - static final class BloomFilteredTermsEnum extends TermsEnum { + static final class BloomFilteredTermsEnum extends BaseTermsEnum { private Terms delegateTerms; private TermsEnum delegateTermsEnum; private final FuzzySet filter; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index 50ab83dabab..d9590e136ad 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -27,6 +27,7 @@ import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.ImpactsEnum; @@ -699,7 +700,7 @@ public final class DirectPostingsFormat extends PostingsFormat { return hasPayloads; } - private final class DirectTermsEnum extends TermsEnum { + private final class DirectTermsEnum extends BaseTermsEnum { private final BytesRef scratch = new BytesRef(); private int termOrd; @@ -944,7 +945,7 @@ public final class DirectPostingsFormat extends PostingsFormat { } } - private final class DirectIntersectTermsEnum extends TermsEnum { + private final class DirectIntersectTermsEnum extends BaseTermsEnum { private final RunAutomaton runAutomaton; private final CompiledAutomaton compiledAutomaton; private int termOrd; @@ -1508,10 +1509,6 @@ public final class DirectPostingsFormat extends PostingsFormat { throw new UnsupportedOperationException(); } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java index 12f51f919ea..12110d9a811 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java @@ -305,7 +305,7 @@ public class FSTOrdTermsReader extends FieldsProducer { } // Only wraps common operations for PBF interact - abstract class BaseTermsEnum extends TermsEnum { + abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum { /* Current term's ord, starts from 0 */ long ord; @@ -626,11 +626,6 @@ public class FSTOrdTermsReader extends FieldsProducer { super.decodeStats(); } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public SeekStatus seekCeil(BytesRef target) throws IOException { throw new UnsupportedOperationException(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index a8cd0ff868c..43528ced1f0 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -259,7 +259,7 @@ public class FSTTermsReader extends FieldsProducer { } // Only wraps common operations for PBF interact - abstract class BaseTermsEnum extends TermsEnum { + abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum { /* Current term stats + decoded metadata (customized by PBF) */ final BlockTermState state; @@ -519,11 +519,6 @@ public class FSTTermsReader extends FieldsProducer { state.totalTermFreq = meta.totalTermFreq; } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public SeekStatus seekCeil(BytesRef target) throws IOException { decoded = false; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index 743dc4fa0e1..1dec0c869d2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.TreeMap; import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.ImpactsEnum; @@ -111,7 +112,7 @@ class SimpleTextFieldsReader extends FieldsProducer { } } - private class SimpleTextTermsEnum extends TermsEnum { + private class SimpleTextTermsEnum extends BaseTermsEnum { private final IndexOptions indexOptions; private int docFreq; private long totalTermFreq; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index ecbf5cb43c4..d859a053446 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -25,6 +25,7 @@ import java.util.SortedMap; import java.util.TreeMap; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.IndexFileNames; @@ -338,7 +339,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { private BytesRef payloads[]; } - private static class SimpleTVTermsEnum extends TermsEnum { + private static class SimpleTVTermsEnum extends BaseTermsEnum { SortedMap terms; Iterator> iterator; Map.Entry current; @@ -358,11 +359,6 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { } } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) throws IOException { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java index 934b5f64a37..848bb0b7528 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java @@ -19,11 +19,11 @@ package org.apache.lucene.codecs.blocktree; import java.io.IOException; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -44,7 +44,7 @@ import org.apache.lucene.util.fst.Outputs; * Likewise, in next it scans until it finds a term that matches the * current automaton transition. */ -final class IntersectTermsEnum extends TermsEnum { +final class IntersectTermsEnum extends BaseTermsEnum { //static boolean DEBUG = BlockTreeTermsWriter.DEBUG; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java index 8e01275378c..c9d0ddf6419 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java @@ -21,10 +21,10 @@ import java.io.IOException; import java.io.PrintStream; import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; @@ -36,7 +36,7 @@ import org.apache.lucene.util.fst.Util; /** Iterates through terms in this field. */ -final class SegmentTermsEnum extends TermsEnum { +final class SegmentTermsEnum extends BaseTermsEnum { // Lazy init: IndexInput in; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java index 5dd3a6431b8..866899c34cb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java @@ -26,6 +26,7 @@ import java.util.NoSuchElementException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.PostingsEnum; @@ -825,7 +826,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem } - private static class TVTermsEnum extends TermsEnum { + private static class TVTermsEnum extends BaseTermsEnum { private int numTerms, startPos, ord; private int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex; @@ -906,11 +907,6 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem } } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) throws IOException { throw new UnsupportedOperationException(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java index bbef30a7952..f660cbcbf75 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; @@ -926,7 +927,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close } } - private static class TermsDict extends TermsEnum { + private static class TermsDict extends BaseTermsEnum { final TermsDictEntry entry; final LongValues blockAddresses; @@ -973,11 +974,6 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close return term; } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) throws IOException { if (ord < 0 || ord >= entry.termsDictSize) { diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/BaseTermsEnum.java new file mode 100644 index 00000000000..0b0d0940350 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/BaseTermsEnum.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; + +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; + +/** + * A base TermsEnum that adds default implementations for + *
    + *
  • {@link #attributes()}
  • + *
  • {@link #termState()}
  • + *
  • {@link #seekExact(BytesRef)}
  • + *
  • {@link #seekExact(BytesRef, TermState)}
  • + *
+ * + * In some cases, the default implementation may be slow and consume huge memory, so subclass SHOULD have its own + * implementation if possible. + */ +public abstract class BaseTermsEnum extends TermsEnum { + + private AttributeSource atts = null; + + /** Sole constructor. (For invocation by subclass + * constructors, typically implicit.) */ + protected BaseTermsEnum() { + super(); + } + + @Override + public TermState termState() throws IOException { + return new TermState() { + @Override + public void copyFrom(TermState other) { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean seekExact(BytesRef text) throws IOException { + return seekCeil(text) == SeekStatus.FOUND; + } + + @Override + public void seekExact(BytesRef term, TermState state) throws IOException { + if (!seekExact(term)) { + throw new IllegalArgumentException("term=" + term + " does not exist"); + } + } + + public AttributeSource attributes() { + if (atts == null) { + atts = new AttributeSource(); + } + return atts; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java index 5d1276690e3..1d26d17f5fd 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java @@ -223,6 +223,16 @@ public abstract class FilterLeafReader extends LeafReader { public ImpactsEnum impacts(int flags) throws IOException { return in.impacts(flags); } + + @Override + public void seekExact(BytesRef term, TermState state) throws IOException { + in.seekExact(term, state); + } + + @Override + public TermState termState() throws IOException { + return in.termState(); + } } /** Base class for filtering {@link PostingsEnum} implementations. */ diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java index b1615c300d8..4ec9fd5662b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java @@ -124,7 +124,7 @@ class FreqProxFields extends Fields { } } - private static class FreqProxTermsEnum extends TermsEnum { + private static class FreqProxTermsEnum extends BaseTermsEnum { final FreqProxTermsWriterPerField terms; final int[] sortedTermIDs; final FreqProxPostingsArray postingsArray; @@ -180,11 +180,6 @@ class FreqProxFields extends Fields { } } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - public void seekExact(long ord) { this.ord = (int) ord; int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]]; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java index d20c6c15f1e..57eb42f6921 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java @@ -32,7 +32,7 @@ import org.apache.lucene.util.PriorityQueue; * * @lucene.experimental */ -public final class MultiTermsEnum extends TermsEnum { +public final class MultiTermsEnum extends BaseTermsEnum { private static final Comparator INDEX_COMPARATOR = new Comparator() { @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java index 5fe9a0d643a..2a06de61d57 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java @@ -25,7 +25,7 @@ import org.apache.lucene.util.BytesRefBuilder; /** Implements a {@link TermsEnum} wrapping a provided * {@link SortedDocValues}. */ -class SortedDocValuesTermsEnum extends TermsEnum { +class SortedDocValuesTermsEnum extends BaseTermsEnum { private final SortedDocValues values; private int currentOrd = -1; private final BytesRefBuilder scratch; diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java index bbeb5c2e3d6..a07af607c7c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java @@ -25,7 +25,7 @@ import java.io.IOException; /** Implements a {@link TermsEnum} wrapping a provided * {@link SortedSetDocValues}. */ -class SortedSetDocValuesTermsEnum extends TermsEnum { +class SortedSetDocValuesTermsEnum extends BaseTermsEnum { private final SortedSetDocValues values; private long currentOrd = -1; private final BytesRefBuilder scratch; diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java index 7750bbbe59e..2cca1df84ec 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java @@ -42,21 +42,16 @@ import org.apache.lucene.util.BytesRefIterator; * @lucene.experimental */ public abstract class TermsEnum implements BytesRefIterator { - private AttributeSource atts = null; - /** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */ protected TermsEnum() { } /** Returns the related attributes. */ - public AttributeSource attributes() { - if (atts == null) atts = new AttributeSource(); - return atts; - } + public abstract AttributeSource attributes(); /** Represents returned result from {@link #seekCeil}. */ - public static enum SeekStatus { + public enum SeekStatus { /** The term was not found, and the end of iteration was hit. */ END, /** The precise term was found. */ @@ -70,15 +65,11 @@ public abstract class TermsEnum implements BytesRefIterator { * unpositioned. For some codecs, seekExact may be substantially faster than {@link #seekCeil}. *

* - * The default implementation can be seekCeil(text) == SeekStatus.FOUND;
- * But this method is performance critical. In some cases, the default implementation may be slow and consume huge memory, - * so subclass SHOULD have its own implementation if possible. - * + * * @return true if the term is found; return false if the enum is unpositioned. */ public abstract boolean seekExact(BytesRef text) throws IOException; - /** Seeks to the specified term, if it exists, or to the * next (ceiling) term. Returns SeekStatus to * indicate whether exact term was found, a different @@ -114,11 +105,7 @@ public abstract class TermsEnum implements BytesRefIterator { * @param term the term the TermState corresponds to * @param state the {@link TermState} * */ - public void seekExact(BytesRef term, TermState state) throws IOException { - if (!seekExact(term)) { - throw new IllegalArgumentException("term=" + term + " does not exist"); - } - } + public abstract void seekExact(BytesRef term, TermState state) throws IOException; /** Returns current term. Do not call this when the enum * is unpositioned. */ @@ -192,14 +179,7 @@ public abstract class TermsEnum implements BytesRefIterator { * @see TermState * @see #seekExact(BytesRef, TermState) */ - public TermState termState() throws IOException { - return new TermState() { - @Override - public void copyFrom(TermState other) { - throw new UnsupportedOperationException(); - } - }; - } + public abstract TermState termState() throws IOException; /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} @@ -208,15 +188,10 @@ public abstract class TermsEnum implements BytesRefIterator { * This should not be a problem, as the enum is always empty and * the existence of unused Attributes does not matter. */ - public static final TermsEnum EMPTY = new TermsEnum() { + public static final TermsEnum EMPTY = new BaseTermsEnum() { @Override public SeekStatus seekCeil(BytesRef term) { return SeekStatus.END; } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) {} diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 375d3c22d5d..a6d56e7ce82 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; @@ -44,7 +45,7 @@ import java.util.Arrays; * {@link BytesRef#compareTo}. Each term in the enumeration is * greater than all that precede it.

*/ -public final class FuzzyTermsEnum extends TermsEnum { +public final class FuzzyTermsEnum extends BaseTermsEnum { // NOTE: we can't subclass FilteredTermsEnum here because we need to sometimes change actualEnum: private TermsEnum actualEnum; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 6955d65ccb4..6094ce67cde 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -613,7 +613,7 @@ public class TestCodecs extends LuceneTestCase { } } - private static class DataTermsEnum extends TermsEnum { + private static class DataTermsEnum extends BaseTermsEnum { final FieldData fieldData; private int upto = -1; @@ -653,11 +653,6 @@ public class TestCodecs extends LuceneTestCase { return SeekStatus.END; } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) { throw new UnsupportedOperationException(); diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index d0dae237a07..433727668a3 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -1333,7 +1333,7 @@ public class MemoryIndex { } } - private class MemoryTermsEnum extends TermsEnum { + private class MemoryTermsEnum extends BaseTermsEnum { private final Info info; private final BytesRef br = new BytesRef(); int termUpto = -1; diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java index 1f1b7ec74b8..7f43b3f89f0 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -20,11 +20,11 @@ import java.io.IOException; import java.io.PrintStream; import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; @@ -39,7 +39,7 @@ import org.apache.lucene.util.fst.Util; * can cast it to call {@link #seekExact(BytesRef, long)} for * optimistic-concurrency, and also {@link #getVersion} to get the * version of the currently seek'd term. */ -public final class IDVersionSegmentTermsEnum extends TermsEnum { +public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // Lazy init: IndexInput in; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index e519ce037e6..0e46b3b1ea2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -34,6 +34,7 @@ import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.TermStats; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; @@ -403,7 +404,7 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { } } - static class RAMTermsEnum extends TermsEnum { + static class RAMTermsEnum extends BaseTermsEnum { Iterator it; String current; private final RAMField ramField; @@ -444,11 +445,6 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { } } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) { throw new UnsupportedOperationException(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java index e4f1e4c9d51..d5cd53c3b14 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java @@ -519,7 +519,7 @@ public class RandomPostingsTester { } } - private static class SeedTermsEnum extends TermsEnum { + private static class SeedTermsEnum extends BaseTermsEnum { final SortedMap terms; final IndexOptions maxAllowed; final boolean allowPayloads; @@ -564,11 +564,6 @@ public class RandomPostingsTester { } } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long ord) { throw new UnsupportedOperationException(); diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java index 21ccf032b99..be58bbc38f1 100644 --- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; @@ -176,7 +177,7 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro } - private class RangeTermsEnum extends TermsEnum { + private class RangeTermsEnum extends BaseTermsEnum { TermsEnum te; BytesRef curr; diff --git a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java index 4ce085c369f..8b5cd5cc577 100644 --- a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java +++ b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; @@ -589,7 +590,7 @@ public class DocTermOrds implements Accountable { * "wrap" our own terms index around the original IndexReader. * Only valid if there are terms for this field rom the original reader */ - private final class OrdWrappedTermsEnum extends TermsEnum { + private final class OrdWrappedTermsEnum extends BaseTermsEnum { private final TermsEnum termsEnum; private BytesRef term; private long ord = -indexInterval-1; // force "real" seek @@ -703,11 +704,6 @@ public class DocTermOrds implements Accountable { } } - @Override - public boolean seekExact(BytesRef text) throws IOException { - return seekCeil(text) == SeekStatus.FOUND; - } - @Override public void seekExact(long targetOrd) throws IOException { int delta = (int) (targetOrd - ordBase - ord);