From 83d150aa21bb0c5e49bf939995263c34114f93f8 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 15 May 2014 10:12:14 +0000 Subject: [PATCH] LUCENE-5667: add test case git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1594846 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/TestTermsEnum.java | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java index c35e90c3e1a..2bea6cc4625 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.IntField; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; @@ -884,4 +885,155 @@ public class TestTermsEnum extends LuceneTestCase { r.close(); dir.close(); } + + // LUCENE-5667 + public void testCommonPrefixTerms() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + Set terms = new HashSet(); + //String prefix = TestUtil.randomSimpleString(random(), 1, 20); + String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20); + int numTerms = atLeast(1000); + if (VERBOSE) { + System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix); + } + while (terms.size() < numTerms) { + //terms.add(prefix + TestUtil.randomSimpleString(random(), 1, 20)); + terms.add(prefix + TestUtil.randomRealisticUnicodeString(random(), 1, 20)); + } + for(String term : terms) { + Document doc = new Document(); + doc.add(newStringField("id", term, Field.Store.YES)); + w.addDocument(doc); + } + IndexReader r = w.getReader(); + if (VERBOSE) { + System.out.println("\nTEST: reader=" + r); + } + + TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator(null); + DocsEnum docsEnum = null; + PerThreadPKLookup pkLookup = new PerThreadPKLookup(r, "id"); + + int iters = atLeast(numTerms*3); + List termsList = new ArrayList<>(terms); + for(int iter=0;iter leaves = new ArrayList<>(r.leaves()); + + // Larger segments are more likely to have the id, so we sort largest to smallest by numDocs: + Collections.sort(leaves, new Comparator() { + @Override + public int compare(AtomicReaderContext c1, AtomicReaderContext c2) { + return c2.reader().numDocs() - c1.reader().numDocs(); + } + }); + + termsEnums = new TermsEnum[leaves.size()]; + docsEnums = new DocsEnum[leaves.size()]; + liveDocs = new Bits[leaves.size()]; + docBases = new int[leaves.size()]; + int numSegs = 0; + boolean hasDeletions = false; + for(int i=0;i