From 2e1c25e6cefa29bc4181073480ce7dc45a952d0b Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 8 Apr 2015 09:19:12 +0000 Subject: [PATCH] LUCENE-5879: fix corner case in auto-prefix intersect git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1672042 13f79535-47bb-0310-9956-ffa450edef68 --- .../blocktree/IntersectTermsEnumFrame.java | 8 ++-- .../lucene/search/TestTermRangeQuery.java | 42 ++++++++++++++----- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java index 299e901d2a0..6ff52292c01 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java @@ -119,7 +119,7 @@ final class IntersectTermsEnumFrame { } void loadNextFloorBlock() throws IOException { - assert numFollowFloorBlocks > 0; + assert numFollowFloorBlocks > 0: "nextFloorLabel=" + nextFloorLabel; //if (DEBUG) System.out.println(" loadNextFloorBlock transition.min=" + transition.min); do { @@ -157,7 +157,7 @@ final class IntersectTermsEnumFrame { //xif (DEBUG) System.out.println(" load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state)); - if (frameIndexData != null && transitionCount != 0) { + if (frameIndexData != null) { // Floor frame if (floorData.length < frameIndexData.length) { this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)]; @@ -172,9 +172,9 @@ final class IntersectTermsEnumFrame { nextFloorLabel = floorDataReader.readByte() & 0xff; //if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel); - // If current state is accept, we must process + // If current state is not accept, and has transitions, we must process // first block in case it has empty suffix: - if (!ite.runAutomaton.isAccept(state)) { + if (ite.runAutomaton.isAccept(state) == false && transitionCount != 0) { // Maybe skip floor blocks: assert transitionIndex == 0: "transitionIndex=" + transitionIndex; while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java index 100d5ce283a..551546ef4d2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java @@ -399,9 +399,11 @@ public class TestTermRangeQuery extends LuceneTestCase { } } - //System.out.println("start " + startTerm + " inclusive? " + startInclusive); - //System.out.println("end " + endTerm + " inclusive? " + endInclusive); - //System.out.println("actual count " + actualCount); + if (VERBOSE) { + System.out.println("start " + startTerm + " inclusive? " + startInclusive); + System.out.println("end " + endTerm + " inclusive? " + endInclusive); + System.out.println("actual count " + actualCount); + } Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); @@ -411,37 +413,52 @@ public class TestTermRangeQuery extends LuceneTestCase { int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100); int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE; - //System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix); - //System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix); + if (VERBOSE) { + System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix); + System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix); + } iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock, minTermsAutoPrefix, maxTermsAutoPrefix))); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); - //System.out.println("TEST: index terms"); + if (VERBOSE) { + System.out.println("TEST: index terms"); + } for (String term : randomTerms) { Document doc = new Document(); doc.add(new StringField("field", term, Field.Store.NO)); w.addDocument(doc); - //System.out.println(" " + term); + if (VERBOSE) { + System.out.println(" " + term); + } + } + + if (VERBOSE) { + System.out.println("TEST: now force merge"); } - //System.out.println("TEST: now force merge"); w.forceMerge(1); IndexReader r = w.getReader(); final Terms terms = MultiFields.getTerms(r, "field"); IndexSearcher s = new IndexSearcher(r); final int finalActualCount = actualCount; - //System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive); + if (VERBOSE) { + System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive); + } TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) { public TermRangeQuery checkTerms() throws IOException { TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource()); int count = 0; while (termsEnum.next() != null) { - //System.out.println("got term: " + termsEnum.term().utf8ToString()); + if (VERBOSE) { + System.out.println("got term: " + termsEnum.term().utf8ToString()); + } count++; } - //System.out.println("count " + count + " vs finalActualCount=" + finalActualCount); + if (VERBOSE) { + System.out.println("count " + count + " vs finalActualCount=" + finalActualCount); + } // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms: assertTrue(count < finalActualCount); @@ -456,6 +473,9 @@ public class TestTermRangeQuery extends LuceneTestCase { q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE); } + if (VERBOSE) { + System.out.println("TEST: use rewrite method " + q.getRewriteMethod()); + } assertEquals(actualCount, s.search(q, 1).totalHits); // Test when min == max: