mirror of
https://github.com/apache/lucene.git
synced 2025-02-24 11:16:35 +00:00
LUCENE-6491: Forbid term queries on fake terms for now.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ae31fb3373
commit
f5cf37e4f3
@ -17,7 +17,9 @@ package org.apache.lucene.codecs.autoprefix;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -46,14 +48,19 @@ import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
@ -494,7 +501,6 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
|
||||
// 1 document has exactly "a", and 30 documents had "a?"
|
||||
verifier.finish(31, maxTermsAutoPrefix);
|
||||
PrefixQuery q = new PrefixQuery(new Term("field", "a"));
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
assertEquals(31, newSearcher(r).search(q, 1).totalHits);
|
||||
r.close();
|
||||
w.close();
|
||||
@ -746,4 +752,247 @@ public class TestAutoPrefixTerms extends LuceneTestCase {
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Make sure auto prefix terms are used with TermRangeQuery */
|
||||
public void testTermRange() throws Exception {
|
||||
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
for(int i=1;i<5;i++) {
|
||||
char[] chars = new char[i];
|
||||
Arrays.fill(chars, 'a');
|
||||
prefixes.add(new String(chars));
|
||||
}
|
||||
|
||||
Set<String> randomTerms = new HashSet<>();
|
||||
int numTerms = atLeast(10000);
|
||||
while (randomTerms.size() < numTerms) {
|
||||
for(String prefix : prefixes) {
|
||||
randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
|
||||
}
|
||||
}
|
||||
|
||||
// We make term range aa<start> - aa<end>
|
||||
char start;
|
||||
char end;
|
||||
|
||||
int actualCount;
|
||||
boolean startInclusive = random().nextBoolean();
|
||||
boolean endInclusive = random().nextBoolean();
|
||||
String startTerm;
|
||||
String endTerm;
|
||||
|
||||
while (true) {
|
||||
start = (char) TestUtil.nextInt(random(), 'a', 'm');
|
||||
end = (char) TestUtil.nextInt(random(), start+1, 'z');
|
||||
|
||||
actualCount = 0;
|
||||
|
||||
startTerm = "aa" + start;
|
||||
endTerm = "aa" + end;
|
||||
|
||||
for(String term : randomTerms) {
|
||||
int cmpStart = startTerm.compareTo(term);
|
||||
int cmpEnd = endTerm.compareTo(term);
|
||||
if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
|
||||
(cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
|
||||
actualCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (actualCount > 2000) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("start " + startTerm + " inclusive? " + startInclusive);
|
||||
System.out.println("end " + endTerm + " inclusive? " + endInclusive);
|
||||
System.out.println("actual count " + actualCount);
|
||||
}
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
|
||||
|
||||
int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
|
||||
System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
|
||||
}
|
||||
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix)));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: index terms");
|
||||
}
|
||||
for (String term : randomTerms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + term);
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now force merge");
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
IndexReader r = w.getReader();
|
||||
final Terms terms = MultiFields.getTerms(r, "field");
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
final int finalActualCount = actualCount;
|
||||
if (VERBOSE) {
|
||||
System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
|
||||
}
|
||||
TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
|
||||
public TermRangeQuery checkTerms() throws IOException {
|
||||
TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
|
||||
int count = 0;
|
||||
while (termsEnum.next() != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("got term: " + termsEnum.term().utf8ToString());
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
|
||||
}
|
||||
|
||||
// Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
|
||||
assertTrue(count < finalActualCount);
|
||||
|
||||
return this;
|
||||
}
|
||||
}.checkTerms();
|
||||
|
||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
|
||||
try {
|
||||
|
||||
// TODO test with boolean rewrite as well once we can create term
|
||||
// queries on fake terms
|
||||
/*if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
BooleanQuery.setMaxClauseCount(actualCount);
|
||||
} else if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
BooleanQuery.setMaxClauseCount(actualCount);
|
||||
}*/
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use rewrite method " + q.getRewriteMethod());
|
||||
}
|
||||
assertEquals(actualCount, s.search(q, 1).totalHits);
|
||||
} finally {
|
||||
BooleanQuery.setMaxClauseCount(maxClauseCount);
|
||||
}
|
||||
|
||||
// Test when min == max:
|
||||
List<String> randomTermsList = new ArrayList<>(randomTerms);
|
||||
for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
|
||||
String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
|
||||
q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
|
||||
assertEquals(1, s.search(q, 1).totalHits);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
/** Make sure auto prefix terms are used with PrefixQuery. */
|
||||
public void testPrefixQuery() throws Exception {
|
||||
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
for(int i=1;i<5;i++) {
|
||||
char[] chars = new char[i];
|
||||
Arrays.fill(chars, 'a');
|
||||
prefixes.add(new String(chars));
|
||||
}
|
||||
|
||||
Set<String> randomTerms = new HashSet<>();
|
||||
int numTerms = atLeast(10000);
|
||||
while (randomTerms.size() < numTerms) {
|
||||
for(String prefix : prefixes) {
|
||||
randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
|
||||
}
|
||||
}
|
||||
|
||||
int actualCount = 0;
|
||||
for(String term : randomTerms) {
|
||||
if (term.startsWith("aa")) {
|
||||
actualCount++;
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("actual count " + actualCount);
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
|
||||
|
||||
// As long as this is never > actualCount, aa should always see at least one auto-prefix term:
|
||||
int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
|
||||
int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix)));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
for (String term : randomTerms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
IndexReader r = w.getReader();
|
||||
final Terms terms = MultiFields.getTerms(r, "field");
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
final int finalActualCount = actualCount;
|
||||
PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
|
||||
public PrefixQuery checkTerms() throws IOException {
|
||||
TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
|
||||
int count = 0;
|
||||
while (termsEnum.next() != null) {
|
||||
//System.out.println("got term: " + termsEnum.term().utf8ToString());
|
||||
count++;
|
||||
}
|
||||
|
||||
// Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
|
||||
assertTrue(count < finalActualCount);
|
||||
|
||||
return this;
|
||||
}
|
||||
}.checkTerms();
|
||||
|
||||
int x = BooleanQuery.getMaxClauseCount();
|
||||
try {
|
||||
// TODO test with boolean rewrite as well once we can create term
|
||||
// queries on fake terms
|
||||
/*BooleanQuery.setMaxClauseCount(randomTerms.size());
|
||||
if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
} else if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
}*/
|
||||
|
||||
assertEquals(actualCount, s.search(q, 1).totalHits);
|
||||
} finally {
|
||||
BooleanQuery.setMaxClauseCount(x);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ public class BlockTermState extends OrdTermState {
|
||||
/** True if this term is "real" (e.g., not an auto-prefix term or
|
||||
* some other "secret" term; currently only {@link BlockTreeTermsReader}
|
||||
* sets this). */
|
||||
public boolean isRealTerm;
|
||||
public boolean isRealTerm = true;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
@ -61,6 +61,11 @@ public class BlockTermState extends OrdTermState {
|
||||
isRealTerm = other.isRealTerm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isRealTerm() {
|
||||
return isRealTerm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer + " isRealTerm=" + isRealTerm;
|
||||
|
@ -182,12 +182,11 @@ public final class TermContext {
|
||||
*
|
||||
* @lucene.internal */
|
||||
public boolean hasOnlyRealTerms() {
|
||||
for(TermState termState : states) {
|
||||
if (termState instanceof BlockTermState && ((BlockTermState) termState).isRealTerm == false) {
|
||||
for (TermState termState : states) {
|
||||
if (termState != null && termState.isRealTerm() == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -50,6 +50,12 @@ public abstract class TermState implements Cloneable {
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if this term is real (e.g., not an auto-prefix term).
|
||||
* @lucene.internal */
|
||||
public boolean isRealTerm() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TermState";
|
||||
|
@ -106,7 +106,12 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
|
||||
if (term == null) {
|
||||
return true;
|
||||
}
|
||||
terms.add(new TermAndState(BytesRef.deepCopyOf(term), termsEnum.termState(), termsEnum.docFreq(), termsEnum.totalTermFreq()));
|
||||
TermState state = termsEnum.termState();
|
||||
if (state.isRealTerm() == false) {
|
||||
// TermQuery does not accept fake terms for now
|
||||
return false;
|
||||
}
|
||||
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
|
||||
}
|
||||
return termsEnum.next() == null;
|
||||
}
|
||||
|
@ -54,6 +54,8 @@ public class TermQuery extends Query {
|
||||
super(TermQuery.this);
|
||||
this.needsScores = needsScores;
|
||||
assert termStates != null : "TermContext must not be null";
|
||||
// checked with a real exception in TermQuery constructor
|
||||
assert termStates.hasOnlyRealTerms();
|
||||
this.termStates = termStates;
|
||||
this.similarity = searcher.getSimilarity();
|
||||
|
||||
@ -164,6 +166,12 @@ public class TermQuery extends Query {
|
||||
public TermQuery(Term t, TermContext states) {
|
||||
assert states != null;
|
||||
term = Objects.requireNonNull(t);
|
||||
if (states.hasOnlyRealTerms() == false) {
|
||||
// The reason for this is that fake terms might have the same bytes as
|
||||
// real terms, and this confuses query caching because they don't match
|
||||
// the same documents
|
||||
throw new IllegalArgumentException("Term queries must be created on real terms");
|
||||
}
|
||||
perReaderTermState = Objects.requireNonNull(states);
|
||||
}
|
||||
|
||||
|
@ -17,32 +17,23 @@ package org.apache.lucene.search;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
@ -84,92 +75,6 @@ public class TestPrefixQuery extends LuceneTestCase {
|
||||
directory.close();
|
||||
}
|
||||
|
||||
/** Make sure auto prefix terms are used with PrefixQuery. */
|
||||
public void testAutoPrefixTermsKickIn() throws Exception {
|
||||
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
for(int i=1;i<5;i++) {
|
||||
char[] chars = new char[i];
|
||||
Arrays.fill(chars, 'a');
|
||||
prefixes.add(new String(chars));
|
||||
}
|
||||
|
||||
Set<String> randomTerms = new HashSet<>();
|
||||
int numTerms = atLeast(10000);
|
||||
while (randomTerms.size() < numTerms) {
|
||||
for(String prefix : prefixes) {
|
||||
randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
|
||||
}
|
||||
}
|
||||
|
||||
int actualCount = 0;
|
||||
for(String term : randomTerms) {
|
||||
if (term.startsWith("aa")) {
|
||||
actualCount++;
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("actual count " + actualCount);
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
|
||||
|
||||
// As long as this is never > actualCount, aa should always see at least one auto-prefix term:
|
||||
int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
|
||||
int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix)));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
for (String term : randomTerms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
IndexReader r = w.getReader();
|
||||
final Terms terms = MultiFields.getTerms(r, "field");
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
final int finalActualCount = actualCount;
|
||||
PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
|
||||
public PrefixQuery checkTerms() throws IOException {
|
||||
TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
|
||||
int count = 0;
|
||||
while (termsEnum.next() != null) {
|
||||
//System.out.println("got term: " + termsEnum.term().utf8ToString());
|
||||
count++;
|
||||
}
|
||||
|
||||
// Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
|
||||
assertTrue(count < finalActualCount);
|
||||
|
||||
return this;
|
||||
}
|
||||
}.checkTerms();
|
||||
|
||||
int x = BooleanQuery.getMaxClauseCount();
|
||||
try {
|
||||
BooleanQuery.setMaxClauseCount(randomTerms.size());
|
||||
if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
} else if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
}
|
||||
|
||||
assertEquals(actualCount, s.search(q, 1).totalHits);
|
||||
} finally {
|
||||
BooleanQuery.setMaxClauseCount(x);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMatchAll() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
|
||||
|
@ -18,32 +18,21 @@ package org.apache.lucene.search;
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
||||
public class TestTermRangeQuery extends LuceneTestCase {
|
||||
@ -348,155 +337,4 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/** Make sure auto prefix terms are used with TermRangeQuery */
|
||||
public void testAutoPrefixTermsKickIn() throws Exception {
|
||||
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
for(int i=1;i<5;i++) {
|
||||
char[] chars = new char[i];
|
||||
Arrays.fill(chars, 'a');
|
||||
prefixes.add(new String(chars));
|
||||
}
|
||||
|
||||
Set<String> randomTerms = new HashSet<>();
|
||||
int numTerms = atLeast(10000);
|
||||
while (randomTerms.size() < numTerms) {
|
||||
for(String prefix : prefixes) {
|
||||
randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
|
||||
}
|
||||
}
|
||||
|
||||
// We make term range aa<start> - aa<end>
|
||||
char start;
|
||||
char end;
|
||||
|
||||
int actualCount;
|
||||
boolean startInclusive = random().nextBoolean();
|
||||
boolean endInclusive = random().nextBoolean();
|
||||
String startTerm;
|
||||
String endTerm;
|
||||
|
||||
while (true) {
|
||||
start = (char) TestUtil.nextInt(random(), 'a', 'm');
|
||||
end = (char) TestUtil.nextInt(random(), start+1, 'z');
|
||||
|
||||
actualCount = 0;
|
||||
|
||||
startTerm = "aa" + start;
|
||||
endTerm = "aa" + end;
|
||||
|
||||
for(String term : randomTerms) {
|
||||
int cmpStart = startTerm.compareTo(term);
|
||||
int cmpEnd = endTerm.compareTo(term);
|
||||
if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
|
||||
(cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
|
||||
actualCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (actualCount > 2000) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("start " + startTerm + " inclusive? " + startInclusive);
|
||||
System.out.println("end " + endTerm + " inclusive? " + endInclusive);
|
||||
System.out.println("actual count " + actualCount);
|
||||
}
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
|
||||
|
||||
int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
|
||||
System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
|
||||
}
|
||||
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix)));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: index terms");
|
||||
}
|
||||
for (String term : randomTerms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + term);
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now force merge");
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
IndexReader r = w.getReader();
|
||||
final Terms terms = MultiFields.getTerms(r, "field");
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
final int finalActualCount = actualCount;
|
||||
if (VERBOSE) {
|
||||
System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
|
||||
}
|
||||
TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
|
||||
public TermRangeQuery checkTerms() throws IOException {
|
||||
TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
|
||||
int count = 0;
|
||||
while (termsEnum.next() != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("got term: " + termsEnum.term().utf8ToString());
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
|
||||
}
|
||||
|
||||
// Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
|
||||
assertTrue(count < finalActualCount);
|
||||
|
||||
return this;
|
||||
}
|
||||
}.checkTerms();
|
||||
|
||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
|
||||
try {
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
BooleanQuery.setMaxClauseCount(actualCount);
|
||||
} else if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
BooleanQuery.setMaxClauseCount(actualCount);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: use rewrite method " + q.getRewriteMethod());
|
||||
}
|
||||
assertEquals(actualCount, s.search(q, 1).totalHits);
|
||||
} finally {
|
||||
BooleanQuery.setMaxClauseCount(maxClauseCount);
|
||||
}
|
||||
|
||||
// Test when min == max:
|
||||
List<String> randomTermsList = new ArrayList<>(randomTerms);
|
||||
for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
|
||||
String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
|
||||
q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
|
||||
assertEquals(1, s.search(q, 1).totalHits);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user