diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/AutoPrefixPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/AutoPrefixPostingsFormat.java
deleted file mode 100644
index b83866a8175..00000000000
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/AutoPrefixPostingsFormat.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.autoprefix;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
-import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
-import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
-import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
-import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Just like {@link Lucene50PostingsFormat} except this format
- * exposes the experimental auto-prefix terms.
- *
- * @lucene.experimental
- */
-
-public final class AutoPrefixPostingsFormat extends PostingsFormat {
-
- private final int minItemsInBlock;
- private final int maxItemsInBlock;
- private final int minItemsInAutoPrefix;
- private final int maxItemsInAutoPrefix;
-
- /** Creates {@code AutoPrefixPostingsFormat} with default settings. */
- public AutoPrefixPostingsFormat() {
- this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
- BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
- 25, 48);
- }
-
- /** Creates {@code Lucene50PostingsFormat} with custom
- * values for {@code minBlockSize} and {@code
- * maxBlockSize} passed to block terms dictionary.
- * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
- public AutoPrefixPostingsFormat(int minItemsInAutoPrefix, int maxItemsInAutoPrefix) {
- this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
- BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
- minItemsInAutoPrefix,
- maxItemsInAutoPrefix);
- }
-
- /** Creates {@code Lucene50PostingsFormat} with custom
- * values for {@code minBlockSize}, {@code
- * maxBlockSize}, {@code minItemsInAutoPrefix} and {@code maxItemsInAutoPrefix}, passed
- * to block tree terms dictionary.
- * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int,int,int) */
- public AutoPrefixPostingsFormat(int minItemsInBlock, int maxItemsInBlock, int minItemsInAutoPrefix, int maxItemsInAutoPrefix) {
- super("AutoPrefix");
- BlockTreeTermsWriter.validateSettings(minItemsInBlock,
- maxItemsInBlock);
- BlockTreeTermsWriter.validateAutoPrefixSettings(minItemsInAutoPrefix,
- maxItemsInAutoPrefix);
- this.minItemsInBlock = minItemsInBlock;
- this.maxItemsInBlock = maxItemsInBlock;
- this.minItemsInAutoPrefix = minItemsInAutoPrefix;
- this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;
- }
-
- @Override
- public String toString() {
- return getName();
- }
-
- @Override
- public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
-
- boolean success = false;
- try {
- FieldsConsumer ret = new BlockTreeTermsWriter(state,
- postingsWriter,
- minItemsInBlock,
- maxItemsInBlock,
- minItemsInAutoPrefix,
- maxItemsInAutoPrefix);
- success = true;
- return ret;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(postingsWriter);
- }
- }
- }
-
- @Override
- public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
- boolean success = false;
- try {
- FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
- success = true;
- return ret;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(postingsReader);
- }
- }
- }
-}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/package-info.java b/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/package-info.java
deleted file mode 100644
index b15f2fad7b2..00000000000
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * An experimental postings format that automatically indexes appropriate
- * prefix terms for fast range and prefix queries.
- */
-package org.apache.lucene.codecs.autoprefix;
diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
index 753b6d7b3ca..710c6050c67 100644
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@@ -19,4 +19,3 @@ org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
org.apache.lucene.codecs.memory.FSTPostingsFormat
org.apache.lucene.codecs.memory.MemoryPostingsFormat
-org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixPostingsFormat.java
deleted file mode 100644
index 3c7059edb71..00000000000
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixPostingsFormat.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.autoprefix;
-
-
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.RandomPostingsTester;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-/**
- * Tests AutoPrefix's postings
- */
-
-// NOTE: we don't extend BasePostingsFormatTestCase becase we can only handle DOCS_ONLY fields:
-
-public class TestAutoPrefixPostingsFormat extends LuceneTestCase {
- public void test() throws Exception {
- new RandomPostingsTester(random()).testFull(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat()),
- createTempDir("autoprefix"),
- IndexOptions.DOCS,
- false);
- }
-}
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java b/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java
deleted file mode 100644
index 316fc9a5709..00000000000
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java
+++ /dev/null
@@ -1,941 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.autoprefix;
-
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.document.BinaryDocValuesField;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.MultiDocValues;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SerialMergeScheduler;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.PrefixQuery;
-import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.MathUtil;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-
-public class TestAutoPrefixTerms extends LuceneTestCase {
-
- private int minItemsPerBlock = TestUtil.nextInt(random(), 2, 100);
- private int maxItemsPerBlock = 2*(Math.max(2, minItemsPerBlock-1)) + random().nextInt(100);
- private int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
- private int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
-
- public TestAutoPrefixTerms() {
- if (LuceneTestCase.VERBOSE) {
- System.out.println("TEST: using minItemsPerBlock=" + minItemsPerBlock + " maxItemsPerBlock=" + maxItemsPerBlock);
- System.out.println("TEST: using minTermsAutoPrefix=" + minTermsAutoPrefix + " maxTermsAutoPrefix=" + maxTermsAutoPrefix);
- }
- }
-
- private final Codec codec = TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minItemsPerBlock, maxItemsPerBlock,
- minTermsAutoPrefix, maxTermsAutoPrefix));
-
- // Numbers in a restricted range, encoded in decimal, left-0-padded:
- public void testBasicNumericRanges() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(codec);
- IndexWriter w = new IndexWriter(dir, iwc);
- int numTerms = TestUtil.nextInt(random(), 3000, 50000);
- Set terms = new HashSet<>();
- int digits = TestUtil.nextInt(random(), 5, 10);
- int maxValue = 1;
- for(int i=0;i sortedTerms = new ArrayList<>(terms);
- Collections.sort(sortedTerms);
-
- if (VERBOSE) {
- System.out.println("TEST: sorted terms:");
- int idx = 0;
- for(String term : sortedTerms) {
- System.out.println(idx + ": " + term);
- idx++;
- }
- }
-
- int iters = atLeast(100);
- for(int iter=0;iter max) {
- int x = min;
- min = max;
- max = x;
- }
- break;
- }
-
- if (VERBOSE) {
- System.out.println("\nTEST: iter=" + iter + " min=" + min + " max=" + max);
- }
-
- boolean minInclusive = random().nextBoolean();
- boolean maxInclusive = random().nextBoolean();
- BytesRef minTerm = new BytesRef(String.format(Locale.ROOT, format, min));
- BytesRef maxTerm = new BytesRef(String.format(Locale.ROOT, format, max));
- CompiledAutomaton ca = new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive),
- true, false, Integer.MAX_VALUE, true);
-
- TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
- NumericDocValues docValues = MultiDocValues.getNumericValues(r, "field");
- PostingsEnum postingsEnum = null;
-
- VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), minTerm, maxTerm);
-
- while (te.next() != null) {
- if (VERBOSE) {
- System.out.println(" got term=" + te.term().utf8ToString());
- }
- verifier.sawTerm(te.term());
- postingsEnum = te.postings(postingsEnum);
- int docID;
- while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
- long v = docValues.get(docID);
- assert v >= min && v <= max: "docID=" + docID + " v=" + v;
- // The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
- if (VERBOSE) {
- System.out.println(" got docID=" + docID + " v=" + v);
- }
- verifier.sawDoc(docID);
- }
- }
-
- int startLoc = Collections.binarySearch(sortedTerms, String.format(Locale.ROOT, format, min));
- if (startLoc < 0) {
- startLoc = -startLoc-1;
- } else if (minInclusive == false) {
- startLoc++;
- }
- int endLoc = Collections.binarySearch(sortedTerms, String.format(Locale.ROOT, format, max));
- if (endLoc < 0) {
- endLoc = -endLoc-2;
- } else if (maxInclusive == false) {
- endLoc--;
- }
- verifier.finish(endLoc-startLoc+1, maxTermsAutoPrefix);
- }
-
- r.close();
- w.close();
- dir.close();
- }
-
- private static BytesRef intToBytes(int v) {
- int sortableBits = v ^ 0x80000000;
- BytesRef token = new BytesRef(4);
- token.length = 4;
- int index = 3;
- while (index >= 0) {
- token.bytes[index] = (byte) (sortableBits & 0xff);
- index--;
- sortableBits >>>= 8;
- }
- return token;
- }
-
- // Numbers are encoded in full binary (4 byte ints):
- public void testBinaryNumericRanges() throws Exception {
- if (VERBOSE) {
- System.out.println("TEST: minItemsPerBlock=" + minItemsPerBlock);
- System.out.println("TEST: maxItemsPerBlock=" + maxItemsPerBlock);
- System.out.println("TEST: minTermsAutoPrefix=" + minTermsAutoPrefix);
- System.out.println("TEST: maxTermsAutoPrefix=" + maxTermsAutoPrefix);
- }
- Directory dir = newDirectory();
- IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(codec);
- IndexWriter w = new IndexWriter(dir, iwc);
- int numTerms = TestUtil.nextInt(random(), 3000, 50000);
- Set terms = new HashSet<>();
- while (terms.size() < numTerms) {
- terms.add(random().nextInt());
- }
-
- for(Integer term : terms) {
- Document doc = new Document();
- doc.add(newStringField("field", intToBytes(term), Field.Store.NO));
- doc.add(new NumericDocValuesField("field", term));
- w.addDocument(doc);
- }
-
- if (random().nextBoolean()) {
- if (VERBOSE) System.out.println("TEST: now force merge");
- w.forceMerge(1);
- }
-
- IndexReader r = DirectoryReader.open(w);
-
- List sortedTerms = new ArrayList<>(terms);
- Collections.sort(sortedTerms);
-
- if (VERBOSE) {
- System.out.println("TEST: sorted terms:");
- int idx = 0;
- for(Integer term : sortedTerms) {
- System.out.println(idx + ": " + term);
- idx++;
- }
- }
-
- int iters = atLeast(100);
- for(int iter=0;iter max) {
- int x = min;
- min = max;
- max = x;
- }
- break;
- }
-
- if (VERBOSE) {
- System.out.println("\nTEST: iter=" + iter + " min=" + min + " (" + intToBytes(min) + ") max=" + max + " (" + intToBytes(max) + ")");
- }
-
- boolean minInclusive = random().nextBoolean();
- BytesRef minTerm = intToBytes(min);
- boolean maxInclusive = random().nextBoolean();
- BytesRef maxTerm = intToBytes(max);
- CompiledAutomaton ca = new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive),
- true, false, Integer.MAX_VALUE, true);
-
- TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
- NumericDocValues docValues = MultiDocValues.getNumericValues(r, "field");
- PostingsEnum postingsEnum = null;
- VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), minTerm, maxTerm);
- while (te.next() != null) {
- if (VERBOSE) {
- System.out.println(" got term=" + te.term() + " docFreq=" + te.docFreq());
- }
- verifier.sawTerm(te.term());
- postingsEnum = te.postings(postingsEnum);
- int docID;
- while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
- long v = docValues.get(docID);
- assert v >= min && v <= max: "docID=" + docID + " v=" + v;
- verifier.sawDoc(docID);
- }
- }
-
- int startLoc = Collections.binarySearch(sortedTerms, min);
- if (startLoc < 0) {
- startLoc = -startLoc-1;
- } else if (minInclusive == false) {
- startLoc++;
- }
- int endLoc = Collections.binarySearch(sortedTerms, max);
- if (endLoc < 0) {
- endLoc = -endLoc-2;
- } else if (maxInclusive == false) {
- endLoc--;
- }
- int expectedHits = endLoc-startLoc+1;
- try {
- verifier.finish(expectedHits, maxTermsAutoPrefix);
- } catch (AssertionError ae) {
- for(int i=0;i min || (v == min && minInclusive)) &&
- (v < max || (v == max && maxInclusive));
- if (accept) {
- System.out.println("MISSING: docID=" + i + " v=" + v + " term=" + intToBytes(v));
- }
- }
- }
-
- throw ae;
- }
- }
-
- r.close();
- w.close();
- dir.close();
- }
-
- // Non-numeric, simple prefix query
- public void testBasicPrefixTerms() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(codec);
- iwc.setMergeScheduler(new SerialMergeScheduler());
- IndexWriter w = new IndexWriter(dir, iwc);
- int numTerms = TestUtil.nextInt(random(), 3000, 50000);
- Set terms = new HashSet<>();
- while (terms.size() < numTerms) {
- terms.add(TestUtil.randomSimpleString(random()));
- }
-
- for(String term : terms) {
- Document doc = new Document();
- doc.add(new StringField("field", term, Field.Store.NO));
- doc.add(new BinaryDocValuesField("field", new BytesRef(term)));
- w.addDocument(doc);
- }
-
- if (random().nextBoolean()) {
- if (VERBOSE) {
- System.out.println("TEST: now force merge");
- }
- w.forceMerge(1);
- }
-
- IndexReader r = DirectoryReader.open(w);
-
- List sortedTerms = new ArrayList<>(terms);
- Collections.sort(sortedTerms);
-
- if (VERBOSE) {
- System.out.println("TEST: sorted terms:");
- int idx = 0;
- for(String term : sortedTerms) {
- System.out.println(idx + ": " + term);
- idx++;
- }
- }
-
- if (VERBOSE) {
- System.out.println("TEST: r=" + r);
- }
-
- int iters = atLeast(100);
- for(int iter=0;iter prefixCounts = new HashMap<>();
- private int totPrefixCount;
- private final BytesRef[] bounds;
- private int totTermCount;
- private BytesRef lastTerm;
-
- public VerifyAutoPrefixTerms(int maxDoc, BytesRef... bounds) {
- allHits = new FixedBitSet(maxDoc);
- assert bounds.length > 0;
- this.bounds = bounds;
- }
-
- public void sawTerm(BytesRef term) {
- //System.out.println("saw term=" + term);
- if (lastTerm != null) {
- assertTrue(lastTerm.compareTo(term) < 0);
- }
- lastTerm = BytesRef.deepCopyOf(term);
- totTermCount++;
- totPrefixCount += term.length;
- for(int i=1;i<=term.length;i++) {
- BytesRef prefix = BytesRef.deepCopyOf(term);
- prefix.length = i;
- Integer count = prefixCounts.get(prefix);
- if (count == null) {
- count = 1;
- } else {
- count += 1;
- }
- prefixCounts.put(prefix, count);
- }
- }
-
- public void sawDoc(int docID) {
- // The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
- assertFalse(allHits.getAndSet(docID));
- }
-
- public void finish(int expectedNumHits, int maxPrefixCount) {
-
- if (maxPrefixCount != -1) {
- // Auto-prefix terms were used in this test
- long allowedMaxTerms;
-
- if (bounds.length == 1) {
- // Simple prefix query: we should never see more than maxPrefixCount terms, except for the empty string:
- if (bounds[0].length == 0) {
- allowedMaxTerms = Integer.MAX_VALUE;
- } else {
- allowedMaxTerms = maxPrefixCount;
- }
- } else {
- // Trickier: we need to allow for maxPrefixTerms for each different leading byte in the min and max:
- assert bounds.length == 2;
- BytesRef minTerm = bounds[0];
- BytesRef maxTerm = bounds[1];
-
- int commonPrefix = 0;
- for(int i=0;i min) {
- // When maxPrefixCount is small (< 16), each byte of the term can require more than one "level" of auto-prefixing:
- // NOTE: this is still only approximate ... it's tricky to get a closed form max bound that's "tight"
- allowedMaxTerms += MathUtil.log(max-min, maxPrefixCount);
- }
- }
- }
-
- assertTrue("totTermCount=" + totTermCount + " is > allowedMaxTerms=" + allowedMaxTerms, totTermCount <= allowedMaxTerms);
- }
-
- assertEquals(expectedNumHits, allHits.cardinality());
- int sum = 0;
- for(Map.Entry ent : prefixCounts.entrySet()) {
-
- BytesRef prefix = ent.getKey();
- if (VERBOSE) {
- System.out.println(" verify prefix=" + TestUtil.bytesRefToString(prefix) + " count=" + ent.getValue());
- }
-
- if (maxPrefixCount != -1) {
- // Auto-prefix terms were used in this test
-
- int sumLeftoverSuffix = 0;
- for(BytesRef bound : bounds) {
-
- int minSharedLength = Math.min(bound.length, prefix.length);
- int commonPrefix = minSharedLength;
- for(int i=0;i {
- w.commit();
- });
- assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", expected.getMessage());
- w.close();
- dir.close();
- }
-
- /** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
- public void testWithPositions() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(codec);
- IndexWriter w = new IndexWriter(dir, iwc);
- FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
- ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
- Document doc = new Document();
- doc.add(new Field("foo", "bar bar", ft));
- w.addDocument(doc);
- IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- w.commit();
- });
- assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", expected.getMessage());
-
- w.close();
- dir.close();
- }
-
- /** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
- public void testWithOffsets() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(codec);
- IndexWriter w = new IndexWriter(dir, iwc);
- FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
- ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
- Document doc = new Document();
- doc.add(new Field("foo", "bar bar", ft));
- w.addDocument(doc);
- IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- w.commit();
- });
- assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", expected.getMessage());
-
- w.close();
- dir.close();
- }
-
- /** Make sure auto prefix terms are used with TermRangeQuery */
- public void testTermRange() throws Exception {
-
- List prefixes = new ArrayList<>();
- for(int i=1;i<5;i++) {
- char[] chars = new char[i];
- Arrays.fill(chars, 'a');
- prefixes.add(new String(chars));
- }
-
- Set randomTerms = new HashSet<>();
- int numTerms = atLeast(10000);
- while (randomTerms.size() < numTerms) {
- for(String prefix : prefixes) {
- randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
- }
- }
-
- // We make term range aa - aa
- char start;
- char end;
-
- int actualCount;
- boolean startInclusive = random().nextBoolean();
- boolean endInclusive = random().nextBoolean();
- String startTerm;
- String endTerm;
-
- while (true) {
- start = (char) TestUtil.nextInt(random(), 'a', 'm');
- end = (char) TestUtil.nextInt(random(), start+1, 'z');
-
- actualCount = 0;
-
- startTerm = "aa" + start;
- endTerm = "aa" + end;
-
- for(String term : randomTerms) {
- int cmpStart = startTerm.compareTo(term);
- int cmpEnd = endTerm.compareTo(term);
- if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
- (cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
- actualCount++;
- }
- }
-
- if (actualCount > 2000) {
- break;
- }
- }
-
- if (VERBOSE) {
- System.out.println("start " + startTerm + " inclusive? " + startInclusive);
- System.out.println("end " + endTerm + " inclusive? " + endInclusive);
- System.out.println("actual count " + actualCount);
- }
-
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
- int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
-
- int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
- int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
-
- if (VERBOSE) {
- System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
- System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
- }
-
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
- minTermsAutoPrefix, maxTermsAutoPrefix)));
- RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-
- if (VERBOSE) {
- System.out.println("TEST: index terms");
- }
- for (String term : randomTerms) {
- Document doc = new Document();
- doc.add(new StringField("field", term, Field.Store.NO));
- w.addDocument(doc);
- if (VERBOSE) {
- System.out.println(" " + term);
- }
- }
-
- if (VERBOSE) {
- System.out.println("TEST: now force merge");
- }
-
- w.forceMerge(1);
- IndexReader r = w.getReader();
- final Terms terms = MultiFields.getTerms(r, "field");
- IndexSearcher s = new IndexSearcher(r);
- final int finalActualCount = actualCount;
- if (VERBOSE) {
- System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
- }
- TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
- public TermRangeQuery checkTerms() throws IOException {
- TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
- int count = 0;
- while (termsEnum.next() != null) {
- if (VERBOSE) {
- System.out.println("got term: " + termsEnum.term().utf8ToString());
- }
- count++;
- }
- if (VERBOSE) {
- System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
- }
-
- // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
- assertTrue(count < finalActualCount);
-
- return this;
- }
- }.checkTerms();
-
- int maxClauseCount = BooleanQuery.getMaxClauseCount();
-
- try {
-
- // TODO test with boolean rewrite as well once we can create term
- // queries on fake terms
- /*if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
- BooleanQuery.setMaxClauseCount(actualCount);
- } else if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
- BooleanQuery.setMaxClauseCount(actualCount);
- }*/
-
- if (VERBOSE) {
- System.out.println("TEST: use rewrite method " + q.getRewriteMethod());
- }
- assertEquals(actualCount, s.search(q, 1).totalHits);
- } finally {
- BooleanQuery.setMaxClauseCount(maxClauseCount);
- }
-
- // Test when min == max:
- List randomTermsList = new ArrayList<>(randomTerms);
- for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
- String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
- q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
- assertEquals(1, s.search(q, 1).totalHits);
- }
-
- r.close();
- w.close();
- dir.close();
- }
-
-
- /** Make sure auto prefix terms are used with PrefixQuery. */
- public void testPrefixQuery() throws Exception {
-
- List prefixes = new ArrayList<>();
- for(int i=1;i<5;i++) {
- char[] chars = new char[i];
- Arrays.fill(chars, 'a');
- prefixes.add(new String(chars));
- }
-
- Set randomTerms = new HashSet<>();
- int numTerms = atLeast(10000);
- while (randomTerms.size() < numTerms) {
- for(String prefix : prefixes) {
- randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
- }
- }
-
- int actualCount = 0;
- for(String term : randomTerms) {
- if (term.startsWith("aa")) {
- actualCount++;
- }
- }
-
- //System.out.println("actual count " + actualCount);
-
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
- int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
-
- // As long as this is never > actualCount, aa should always see at least one auto-prefix term:
- int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
- int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
-
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
- minTermsAutoPrefix, maxTermsAutoPrefix)));
- RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-
- for (String term : randomTerms) {
- Document doc = new Document();
- doc.add(new StringField("field", term, Field.Store.NO));
- w.addDocument(doc);
- }
-
- w.forceMerge(1);
- IndexReader r = w.getReader();
- final Terms terms = MultiFields.getTerms(r, "field");
- IndexSearcher s = new IndexSearcher(r);
- final int finalActualCount = actualCount;
- PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
- public PrefixQuery checkTerms() throws IOException {
- TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
- int count = 0;
- while (termsEnum.next() != null) {
- //System.out.println("got term: " + termsEnum.term().utf8ToString());
- count++;
- }
-
- // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
- assertTrue(count < finalActualCount);
-
- return this;
- }
- }.checkTerms();
-
- int x = BooleanQuery.getMaxClauseCount();
- try {
- // TODO test with boolean rewrite as well once we can create term
- // queries on fake terms
- /*BooleanQuery.setMaxClauseCount(randomTerms.size());
- if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
- } else if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
- }*/
-
- assertEquals(actualCount, s.search(q, 1).totalHits);
- } finally {
- BooleanQuery.setMaxClauseCount(x);
- }
-
- r.close();
- w.close();
- dir.close();
- }
-}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
index 595bfbb8ad5..c317668a12c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.codecs;
-import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; // javadocs
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.TermState;
@@ -39,11 +38,6 @@ public class BlockTermState extends OrdTermState {
// TODO: update BTR to nuke this
public long blockFilePointer;
- /** True if this term is "real" (e.g., not an auto-prefix term or
- * some other "secret" term; currently only {@link BlockTreeTermsReader}
- * sets this). */
- public boolean isRealTerm = true;
-
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected BlockTermState() {
@@ -58,16 +52,10 @@ public class BlockTermState extends OrdTermState {
totalTermFreq = other.totalTermFreq;
termBlockOrd = other.termBlockOrd;
blockFilePointer = other.blockFilePointer;
- isRealTerm = other.isRealTerm;
- }
-
- @Override
- public boolean isRealTerm() {
- return isRealTerm;
}
@Override
public String toString() {
- return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer + " isRealTerm=" + isRealTerm;
+ return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
deleted file mode 100644
index 501068d627a..00000000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.blocktree;
-
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.lucene.index.FilteredTermsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.StringHelper;
-
-// TODO: instead of inlining auto-prefix terms with normal terms,
-// we could write them into their own virtual/private field. This
-// would make search time a bit more complex, since we'd need to
-// merge sort between two TermEnums, but it would also make stats
-// API (used by CheckIndex -verbose) easier to implement since we could
-// just walk this virtual field and gather its stats)
-
-/** Used in the first pass when writing a segment to locate
- * "appropriate" auto-prefix terms to pre-compile into the index.
- * This visits every term in the index to find prefixes that
- * match {@code >= min} and {@code <= max} number of terms. */
-
-class AutoPrefixTermsWriter {
-
- //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
- //static boolean DEBUG = false;
- //static boolean DEBUG2 = BlockTreeTermsWriter.DEBUG2;
- //static boolean DEBUG2 = true;
-
- /** Describes a range of term-space to match, either a simple prefix
- * (foo*) or a floor-block range of a prefix (e.g. foo[a-m]*,
- * foo[n-z]*) when there are too many terms starting with foo*. */
- public static final class PrefixTerm implements Comparable {
- /** Common prefix */
- public final byte[] prefix;
-
- /** If this is -2, this is a normal prefix (foo *), else it's the minimum lead byte of the suffix (e.g. 'd' in foo[d-m]*). */
- public final int floorLeadStart;
-
- /** The lead byte (inclusive) of the suffix for the term range we match (e.g. 'm' in foo[d-m*]); this is ignored when
- * floorLeadStart is -2. */
- public final int floorLeadEnd;
-
- public final BytesRef term;
-
- /** Sole constructor. */
- public PrefixTerm(byte[] prefix, int floorLeadStart, int floorLeadEnd) {
- this.prefix = prefix;
- this.floorLeadStart = floorLeadStart;
- this.floorLeadEnd = floorLeadEnd;
- this.term = toBytesRef(prefix, floorLeadStart);
-
- assert floorLeadEnd >= floorLeadStart;
- assert floorLeadEnd >= 0;
- assert floorLeadStart == -2 || floorLeadStart >= 0;
-
- // We should never create empty-string prefix term:
- assert prefix.length > 0 || floorLeadStart != -2 || floorLeadEnd != 0xff;
- }
-
- @Override
- public String toString() {
- String s = brToString(new BytesRef(prefix));
- if (floorLeadStart == -2) {
- s += "[-" + Integer.toHexString(floorLeadEnd) + "]";
- } else {
- s += "[" + Integer.toHexString(floorLeadStart) + "-" + Integer.toHexString(floorLeadEnd) + "]";
- }
- return s;
- }
-
- @Override
- public int compareTo(PrefixTerm other) {
- int cmp = term.compareTo(other.term);
- if (cmp == 0) {
- if (prefix.length != other.prefix.length) {
- return prefix.length - other.prefix.length;
- }
-
- // On tie, sort the bigger floorLeadEnd, earlier, since it
- // spans more terms, so during intersect, we want to encounter this one
- // first so we can use it if the automaton accepts the larger range:
- cmp = other.floorLeadEnd - floorLeadEnd;
- }
-
- return cmp;
- }
-
- /** Returns the leading term for this prefix term, e.g. "foo" (for
- * the foo* prefix) or "foom" (for the foo[m-z]* case). */
- private static BytesRef toBytesRef(byte[] prefix, int floorLeadStart) {
- BytesRef br;
- if (floorLeadStart != -2) {
- assert floorLeadStart >= 0;
- br = new BytesRef(prefix.length+1);
- } else {
- br = new BytesRef(prefix.length);
- }
- System.arraycopy(prefix, 0, br.bytes, 0, prefix.length);
- br.length = prefix.length;
- if (floorLeadStart != -2) {
- assert floorLeadStart >= 0;
- br.bytes[br.length++] = (byte) floorLeadStart;
- }
-
- return br;
- }
-
- public int compareTo(BytesRef term) {
- return this.term.compareTo(term);
- }
-
- public TermsEnum getTermsEnum(TermsEnum in) {
-
- final BytesRef prefixRef = new BytesRef(prefix);
-
- return new FilteredTermsEnum(in) {
- {
- setInitialSeekTerm(term);
- }
-
- @Override
- protected AcceptStatus accept(BytesRef term) {
- if (StringHelper.startsWith(term, prefixRef) &&
- (floorLeadEnd == -1 || term.length == prefixRef.length || (term.bytes[term.offset + prefixRef.length] & 0xff) <= floorLeadEnd)) {
- return AcceptStatus.YES;
- } else {
- return AcceptStatus.END;
- }
- }
- };
- }
- }
-
- // for debugging
- static String brToString(BytesRef b) {
- try {
- return b.utf8ToString() + " " + b;
- } catch (Throwable t) {
- // If BytesRef isn't actually UTF8, or it's eg a
- // prefix of UTF8 that ends mid-unicode-char, we
- // fallback to hex:
- return b.toString();
- }
- }
-
- final List prefixes = new ArrayList<>();
- private final int minItemsInPrefix;
- private final int maxItemsInPrefix;
-
- // Records index into pending where the current prefix at that
- // length "started"; for example, if current term starts with 't',
- // startsByPrefix[0] is the index into pending for the first
- // term/sub-block starting with 't'. We use this to figure out when
- // to write a new block:
- private final BytesRefBuilder lastTerm = new BytesRefBuilder();
- private int[] prefixStarts = new int[8];
- private List
Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
* for the BlockTree implementation.
- *
HasAutoPrefixTerms is a single byte; 1 means there may be auto-prefix terms and 0 means there are none.
*
DirOffset is a pointer to the FieldSummary section.
*
DocFreq is the count of documents which contain the term.
*
TotalTermFreq is the total number of occurrences of the term. This is encoded
@@ -223,8 +211,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
final int maxDoc;
final int minItemsInBlock;
final int maxItemsInBlock;
- final int minItemsInAutoPrefix;
- final int maxItemsInAutoPrefix;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
@@ -260,43 +246,14 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
private final List fields = new ArrayList<>();
- // private final String segment;
- final FixedBitSet prefixDocs;
-
- /** Reused in getAutoPrefixTermsEnum: */
- final BitSetTermsEnum prefixFixedBitsTermsEnum;
-
- /** Reused in getAutoPrefixTermsEnum: */
- private TermsEnum prefixTermsEnum;
-
- /** Reused in getAutoPrefixTermsEnum: */
- private PostingsEnum prefixDocsEnum;
-
- /** Create a new writer, using default values for auto-prefix terms. */
- public BlockTreeTermsWriter(SegmentWriteState state,
- PostingsWriterBase postingsWriter,
- int minItemsInBlock,
- int maxItemsInBlock) throws IOException {
- this(state, postingsWriter, minItemsInBlock, maxItemsInBlock, 0, 0);
- }
-
/** Create a new writer. The number of items (terms or
* sub-blocks) per block will aim to be between
* minItemsPerBlock and maxItemsPerBlock, though in some
- * cases the blocks may be smaller than the min.
- * For DOCS_ONLY fields, this terms dictionary will
- * insert automatically generated prefix terms for common
- * prefixes, as long as each prefix matches at least
- * {@code minItemsInAutoPrefix} other terms or prefixes,
- * and at most {@code maxItemsInAutoPrefix} other terms
- * or prefixes. Set {@code minItemsInAutoPrefix} to 0
- * to disable auto-prefix terms. */
+ * cases the blocks may be smaller than the min. */
public BlockTreeTermsWriter(SegmentWriteState state,
PostingsWriterBase postingsWriter,
int minItemsInBlock,
- int maxItemsInBlock,
- int minItemsInAutoPrefix,
- int maxItemsInAutoPrefix)
+ int maxItemsInBlock)
throws IOException
{
validateSettings(minItemsInBlock,
@@ -305,21 +262,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
this.minItemsInBlock = minItemsInBlock;
this.maxItemsInBlock = maxItemsInBlock;
- validateAutoPrefixSettings(minItemsInAutoPrefix,
- maxItemsInAutoPrefix);
-
- if (minItemsInAutoPrefix != 0) {
- // TODO: can we used compressed bitset instead? that auto-upgrades if it's dense enough...
- prefixDocs = new FixedBitSet(state.segmentInfo.maxDoc());
- prefixFixedBitsTermsEnum = new BitSetTermsEnum(prefixDocs);
- } else {
- prefixDocs = null;
- prefixFixedBitsTermsEnum = null;
- }
-
- this.minItemsInAutoPrefix = minItemsInAutoPrefix;
- this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;
-
this.maxDoc = state.segmentInfo.maxDoc();
this.fieldInfos = state.fieldInfos;
this.postingsWriter = postingsWriter;
@@ -332,13 +274,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
- // So at read time we know, globally, that there will be no auto-prefix terms:
- if (minItemsInAutoPrefix == 0) {
- termsOut.writeByte((byte) 0);
- } else {
- termsOut.writeByte((byte) 1);
- }
-
final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
indexOut = state.directory.createOutput(indexName, state.context);
CodecUtil.writeIndexHeader(indexOut, BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
@@ -380,25 +315,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
}
- /** Throws {@code IllegalArgumentException} if any of these settings
- * is invalid. */
- public static void validateAutoPrefixSettings(int minItemsInAutoPrefix,
- int maxItemsInAutoPrefix) {
- if (minItemsInAutoPrefix != 0) {
- if (minItemsInAutoPrefix < 2) {
- throw new IllegalArgumentException("minItemsInAutoPrefix must be at least 2; got minItemsInAutoPrefix=" + minItemsInAutoPrefix);
- }
- if (minItemsInAutoPrefix > maxItemsInAutoPrefix) {
- throw new IllegalArgumentException("maxItemsInAutoPrefix must be >= minItemsInAutoPrefix; got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
- }
- if (2*(minItemsInAutoPrefix-1) > maxItemsInAutoPrefix) {
- throw new IllegalArgumentException("maxItemsInAutoPrefix must be at least 2*(minItemsInAutoPrefix-1); got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
- }
- } else if (maxItemsInAutoPrefix != 0) {
- throw new IllegalArgumentException("maxItemsInAutoPrefix must be 0 (disabled) when minItemsInAutoPrefix is 0");
- }
- }
-
@Override
public void write(Fields fields) throws IOException {
//if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
@@ -413,75 +329,26 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (terms == null) {
continue;
}
- FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-
- // First pass to find all prefix terms we should compile into the index:
- List prefixTerms;
- if (minItemsInAutoPrefix != 0) {
- if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
- throw new IllegalStateException("ranges can only be indexed with IndexOptions.DOCS (field: " + fieldInfo.name + ")");
- }
- prefixTerms = new AutoPrefixTermsWriter(terms, minItemsInAutoPrefix, maxItemsInAutoPrefix).prefixes;
- //if (DEBUG) {
- // for(PrefixTerm term : prefixTerms) {
- // System.out.println("field=" + fieldInfo.name + " PREFIX TERM: " + term);
- // }
- //}
- } else {
- prefixTerms = null;
- }
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
- int prefixTermUpto = 0;
while (true) {
BytesRef term = termsEnum.next();
//if (DEBUG) System.out.println("BTTW: next term " + term);
- // Insert (merge sort) next prefix term(s):
- if (prefixTerms != null) {
- while (prefixTermUpto < prefixTerms.size() && (term == null || prefixTerms.get(prefixTermUpto).compareTo(term) <= 0)) {
- PrefixTerm prefixTerm = prefixTerms.get(prefixTermUpto);
- //if (DEBUG) System.out.println("seg=" + segment + " field=" + fieldInfo.name + " NOW INSERT prefix=" + prefixTerm);
- termsWriter.write(prefixTerm.term, getAutoPrefixTermsEnum(terms, prefixTerm), prefixTerm);
- prefixTermUpto++;
- }
- }
-
if (term == null) {
break;
}
//if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
- termsWriter.write(term, termsEnum, null);
+ termsWriter.write(term, termsEnum);
}
- assert prefixTerms == null || prefixTermUpto == prefixTerms.size();
-
termsWriter.finish();
//if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
}
}
-
- private TermsEnum getAutoPrefixTermsEnum(Terms terms, final PrefixTerm prefix) throws IOException {
- assert prefixDocs != null;
- prefixDocs.clear(0, prefixDocs.length());
-
- prefixTermsEnum = prefix.getTermsEnum(terms.iterator());
-
- //System.out.println("BTTW.getAutoPrefixTE: prefix=" + prefix);
- while (prefixTermsEnum.next() != null) {
- //System.out.println(" got term=" + prefixTermsEnum.term().utf8ToString());
- //termCount++;
- prefixDocsEnum = prefixTermsEnum.postings(prefixDocsEnum, 0);
- //System.out.println(" " + prefixDocsEnum + " doc=" + prefixDocsEnum.docID());
- prefixDocs.or(prefixDocsEnum);
- }
-
- //System.out.println(" done terms: " + prefixDocs.cardinality() + " doc seen; " + termCount + " terms seen");
- return prefixFixedBitsTermsEnum;
- }
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
assert fp < (1L << 62);
@@ -500,16 +367,12 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
public final byte[] termBytes;
// stats + metadata
public final BlockTermState state;
- // Non-null if this is an auto-prefix-term:
- public final PrefixTerm prefixTerm;
- public PendingTerm other;
- public PendingTerm(BytesRef term, BlockTermState state, PrefixTerm prefixTerm) {
+ public PendingTerm(BytesRef term, BlockTermState state) {
super(true);
this.termBytes = new byte[term.length];
System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length);
this.state = state;
- this.prefixTerm = prefixTerm;
}
@Override
@@ -698,7 +561,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// only points to sub-blocks in the terms index so we can avoid seeking
// to it when we are looking for a term):
boolean hasTerms = false;
- boolean hasPrefixTerms = false;
boolean hasSubBlocks = false;
int start = pending.size()-count;
@@ -739,11 +601,10 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// block as soon as we have at least minItemsInBlock. This is not always best: it often produces
// a too-small block as the final block:
boolean isFloor = itemsInBlock < count;
- newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasPrefixTerms, hasSubBlocks));
+ newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
hasTerms = false;
hasSubBlocks = false;
- hasPrefixTerms = false;
nextFloorLeadLabel = suffixLeadLabel;
nextBlockStart = i;
}
@@ -753,7 +614,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (ent.isTerm) {
hasTerms = true;
- hasPrefixTerms |= ((PendingTerm) ent).prefixTerm != null;
} else {
hasSubBlocks = true;
}
@@ -763,7 +623,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
if (nextBlockStart < end) {
int itemsInBlock = end - nextBlockStart;
boolean isFloor = itemsInBlock < count;
- newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasPrefixTerms, hasSubBlocks));
+ newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
}
assert newBlocks.isEmpty() == false;
@@ -789,7 +649,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
* same prefix, and so we broke it into multiple floor blocks where
* we record the starting label of the suffix of each floor block. */
private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end,
- boolean hasTerms, boolean hasPrefixTerms, boolean hasSubBlocks) throws IOException {
+ boolean hasTerms, boolean hasSubBlocks) throws IOException {
assert end > start;
@@ -823,7 +683,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// We optimize the leaf block case (block has only terms), writing a more
// compact format in this case:
- boolean isLeafBlock = hasSubBlocks == false && hasPrefixTerms == false;
+ boolean isLeafBlock = hasSubBlocks == false;
//System.out.println(" isLeaf=" + isLeafBlock);
@@ -839,7 +699,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
assert ent.isTerm: "i=" + i;
PendingTerm term = (PendingTerm) ent;
- assert term.prefixTerm == null;
assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
BlockTermState state = term.state;
@@ -876,7 +735,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
} else {
// Block has at least one prefix term or a sub block:
subIndices = new ArrayList<>();
- boolean sawAutoPrefixTerm = false;
for (int i=start;i 0;
- sawAutoPrefixTerm = true;
- PrefixTerm prefixTerm = term.prefixTerm;
- floorLeadEnd = prefixTerm.floorLeadEnd;
- assert floorLeadEnd != -1;
-
- if (prefixTerm.floorLeadStart == -2) {
- // Starts with empty string
- code |= 2;
- } else {
- code |= 3;
- }
- }
- suffixWriter.writeVInt(code);
- suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
- if (floorLeadEnd != -1) {
- suffixWriter.writeByte((byte) floorLeadEnd);
- }
- assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
- }
+ suffixWriter.writeVInt(suffix << 1);
+ suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
statsWriter.writeVInt(state.docFreq);
@@ -961,13 +791,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
assert suffix > 0;
// For non-leaf block we borrow 1 bit to record
- // if entry is term or sub-block, and 1 bit (unset here) to
- // record if it's a prefix term:
- if (minItemsInAutoPrefix == 0) {
- suffixWriter.writeVInt((suffix<<1)|1);
- } else {
- suffixWriter.writeVInt((suffix<<2)|1);
- }
+ // if entry is term or sub-block:f
+ suffixWriter.writeVInt((suffix<<1)|1);
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
//if (DEBUG2) {
@@ -985,7 +810,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
}
- assert subIndices.size() != 0 || sawAutoPrefixTerm;
+ assert subIndices.size() != 0;
}
// TODO: we could block-write the term suffix pointers;
@@ -1029,7 +854,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
/** Writes one term's worth of postings. */
- public void write(BytesRef text, TermsEnum termsEnum, PrefixTerm prefixTerm) throws IOException {
+ public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
/*
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
@@ -1045,20 +870,17 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
pushTerm(text);
- PendingTerm term = new PendingTerm(text, state, prefixTerm);
+ PendingTerm term = new PendingTerm(text, state);
pending.add(term);
//if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" + pending.size());
- if (prefixTerm == null) {
- // Only increment stats for real terms:
- sumDocFreq += state.docFreq;
- sumTotalTermFreq += state.totalTermFreq;
- numTerms++;
- if (firstPendingTerm == null) {
- firstPendingTerm = term;
- }
- lastPendingTerm = term;
+ sumDocFreq += state.docFreq;
+ sumTotalTermFreq += state.totalTermFreq;
+ numTerms++;
+ if (firstPendingTerm == null) {
+ firstPendingTerm = term;
}
+ lastPendingTerm = term;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
index 4d5b4cebffa..3934c553d9b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
@@ -348,7 +348,6 @@ final class IntersectTermsEnum extends TermsEnum {
private boolean skipPastLastAutoPrefixTerm() throws IOException {
assert currentFrame.isAutoPrefixTerm;
useAutoPrefixTerm = false;
- currentFrame.termState.isRealTerm = true;
// If we last returned an auto-prefix term, we must now skip all
// actual terms sharing that prefix. At most, that skipping
@@ -694,7 +693,6 @@ final class IntersectTermsEnum extends TermsEnum {
if (useAutoPrefixTerm) {
// All suffixes of this auto-prefix term are accepted by the automaton, so we can use it:
copyTerm();
- currentFrame.termState.isRealTerm = false;
return term;
} else {
// We move onto the next term
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermContext.java b/lucene/core/src/java/org/apache/lucene/index/TermContext.java
index ee5163eba6e..e55aeba7c75 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermContext.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermContext.java
@@ -177,18 +177,6 @@ public final class TermContext {
return totalTermFreq;
}
- /** Returns true if all terms stored here are real (e.g., not auto-prefix terms).
- *
- * @lucene.internal */
- public boolean hasOnlyRealTerms() {
- for (TermState termState : states) {
- if (termState != null && termState.isRealTerm() == false) {
- return false;
- }
- }
- return true;
- }
-
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermState.java b/lucene/core/src/java/org/apache/lucene/index/TermState.java
index d8e9be4c619..7d3f1330b57 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermState.java
@@ -48,12 +48,6 @@ public abstract class TermState implements Cloneable {
// should not happen
throw new RuntimeException(cnse);
}
- }
-
- /** Returns true if this term is real (e.g., not an auto-prefix term).
- * @lucene.internal */
- public boolean isRealTerm() {
- return true;
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
index 66577570143..ed4c43073ec 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
@@ -122,10 +122,6 @@ final class MultiTermQueryConstantScoreWrapper extends
return true;
}
TermState state = termsEnum.termState();
- if (state.isRealTerm() == false) {
- // TermQuery does not accept fake terms for now
- return false;
- }
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
}
return termsEnum.next() == null;
diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
index 3a62e1599f4..38345b30a04 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
@@ -115,7 +115,7 @@ public abstract class ScoringRewrite extends TermCollectingRewrite {
for (int i = 0; i < size; i++) {
final int pos = sort[i];
final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
- assert termStates[pos].hasOnlyRealTerms() == false || reader.docFreq(term) == termStates[pos].docFreq();
+ assert reader.docFreq(term) == termStates[pos].docFreq();
addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index e815ff61a8a..6547b10439a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -53,8 +53,6 @@ public class TermQuery extends Query {
super(TermQuery.this);
this.needsScores = needsScores;
assert termStates != null : "TermContext must not be null";
- // checked with a real exception in TermQuery constructor
- assert termStates.hasOnlyRealTerms();
this.termStates = termStates;
this.similarity = searcher.getSimilarity(needsScores);
@@ -166,12 +164,6 @@ public class TermQuery extends Query {
public TermQuery(Term t, TermContext states) {
assert states != null;
term = Objects.requireNonNull(t);
- if (states.hasOnlyRealTerms() == false) {
- // The reason for this is that fake terms might have the same bytes as
- // real terms, and this confuses query caching because they don't match
- // the same documents
- throw new IllegalArgumentException("Term queries must be created on real terms");
- }
perReaderTermState = Objects.requireNonNull(states);
}
diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
index 132dbceff11..ee6340d9f7e 100644
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@@ -345,10 +345,6 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
return -count - 1;
}
TermState state = termsEnum.termState();
- if (state.isRealTerm() == false) {
- // TermQuery does not accept fake terms for now
- return count;
- }
int df = termsEnum.docFreq();
count += df;
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, df, termsEnum.totalTermFreq()));