mirror of https://github.com/apache/lucene.git
LUCENE-5667: add test case
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1594846 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
db8a1c7338
commit
83d150aa21
|
@ -27,6 +27,7 @@ import org.apache.lucene.document.IntField;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.FieldCache;
|
import org.apache.lucene.search.FieldCache;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LineFileDocs;
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
|
@ -884,4 +885,155 @@ public class TestTermsEnum extends LuceneTestCase {
|
||||||
r.close();
|
r.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-5667
|
||||||
|
public void testCommonPrefixTerms() throws Exception {
|
||||||
|
Directory d = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||||
|
Set<String> terms = new HashSet<String>();
|
||||||
|
//String prefix = TestUtil.randomSimpleString(random(), 1, 20);
|
||||||
|
String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
|
||||||
|
int numTerms = atLeast(1000);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix);
|
||||||
|
}
|
||||||
|
while (terms.size() < numTerms) {
|
||||||
|
//terms.add(prefix + TestUtil.randomSimpleString(random(), 1, 20));
|
||||||
|
terms.add(prefix + TestUtil.randomRealisticUnicodeString(random(), 1, 20));
|
||||||
|
}
|
||||||
|
for(String term : terms) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("id", term, Field.Store.YES));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
IndexReader r = w.getReader();
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: reader=" + r);
|
||||||
|
}
|
||||||
|
|
||||||
|
TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator(null);
|
||||||
|
DocsEnum docsEnum = null;
|
||||||
|
PerThreadPKLookup pkLookup = new PerThreadPKLookup(r, "id");
|
||||||
|
|
||||||
|
int iters = atLeast(numTerms*3);
|
||||||
|
List<String> termsList = new ArrayList<>(terms);
|
||||||
|
for(int iter=0;iter<iters;iter++) {
|
||||||
|
String term;
|
||||||
|
boolean shouldExist;
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
term = termsList.get(random().nextInt(terms.size()));
|
||||||
|
shouldExist = true;
|
||||||
|
} else {
|
||||||
|
term = prefix + TestUtil.randomSimpleString(random(), 1, 20);
|
||||||
|
shouldExist = terms.contains(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: try term=" + term);
|
||||||
|
System.out.println(" shouldExist?=" + shouldExist);
|
||||||
|
}
|
||||||
|
|
||||||
|
BytesRef termBytesRef = new BytesRef(term);
|
||||||
|
|
||||||
|
boolean actualResult = termsEnum.seekExact(termBytesRef);
|
||||||
|
assertEquals(shouldExist, actualResult);
|
||||||
|
if (shouldExist) {
|
||||||
|
docsEnum = termsEnum.docs(null, docsEnum, 0);
|
||||||
|
int docID = docsEnum.nextDoc();
|
||||||
|
assertTrue(docID != DocsEnum.NO_MORE_DOCS);
|
||||||
|
assertEquals(docID, pkLookup.lookup(termBytesRef));
|
||||||
|
StoredDocument doc = r.document(docID);
|
||||||
|
assertEquals(term, doc.get("id"));
|
||||||
|
|
||||||
|
if (random().nextInt(7) == 1) {
|
||||||
|
termsEnum.next();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assertEquals(-1, pkLookup.lookup(termBytesRef));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random().nextInt(7) == 1) {
|
||||||
|
TermsEnum.SeekStatus status = termsEnum.seekCeil(termBytesRef);
|
||||||
|
if (shouldExist) {
|
||||||
|
assertEquals(TermsEnum.SeekStatus.FOUND, status);
|
||||||
|
} else {
|
||||||
|
assertNotSame(TermsEnum.SeekStatus.FOUND, status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
w.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Utility class to do efficient primary-key (only 1 doc contains the
|
||||||
|
* given term) lookups by segment, re-using the enums. This class is
|
||||||
|
* not thread safe, so it is the caller's job to create and use one
|
||||||
|
* instance of this per thread. Do not use this if a term may appear
|
||||||
|
* in more than one document! It will only return the first one it
|
||||||
|
* finds. */
|
||||||
|
static class PerThreadPKLookup {
|
||||||
|
|
||||||
|
private final TermsEnum[] termsEnums;
|
||||||
|
private final DocsEnum[] docsEnums;
|
||||||
|
private final Bits[] liveDocs;
|
||||||
|
private final int[] docBases;
|
||||||
|
private final int numSegs;
|
||||||
|
private final boolean hasDeletions;
|
||||||
|
|
||||||
|
public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {
|
||||||
|
|
||||||
|
List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
|
||||||
|
|
||||||
|
// Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
|
||||||
|
Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
|
||||||
|
@Override
|
||||||
|
public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
|
||||||
|
return c2.reader().numDocs() - c1.reader().numDocs();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
termsEnums = new TermsEnum[leaves.size()];
|
||||||
|
docsEnums = new DocsEnum[leaves.size()];
|
||||||
|
liveDocs = new Bits[leaves.size()];
|
||||||
|
docBases = new int[leaves.size()];
|
||||||
|
int numSegs = 0;
|
||||||
|
boolean hasDeletions = false;
|
||||||
|
for(int i=0;i<leaves.size();i++) {
|
||||||
|
Fields fields = leaves.get(i).reader().fields();
|
||||||
|
if (fields != null) {
|
||||||
|
Terms terms = fields.terms(idFieldName);
|
||||||
|
if (terms != null) {
|
||||||
|
termsEnums[numSegs] = terms.iterator(null);
|
||||||
|
assert termsEnums[numSegs] != null;
|
||||||
|
docBases[numSegs] = leaves.get(i).docBase;
|
||||||
|
liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
|
||||||
|
hasDeletions |= leaves.get(i).reader().hasDeletions();
|
||||||
|
numSegs++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.numSegs = numSegs;
|
||||||
|
this.hasDeletions = hasDeletions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns docID if found, else -1. */
|
||||||
|
public int lookup(BytesRef id) throws IOException {
|
||||||
|
for(int seg=0;seg<numSegs;seg++) {
|
||||||
|
if (termsEnums[seg].seekExact(id)) {
|
||||||
|
docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
|
||||||
|
int docID = docsEnums[seg].nextDoc();
|
||||||
|
if (docID != DocsEnum.NO_MORE_DOCS) {
|
||||||
|
return docBases[seg] + docID;
|
||||||
|
}
|
||||||
|
assert hasDeletions;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: add reopen method to carry over re-used enums...?
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue