From 5dc30b9527e9db155d3bf2b5534f422ac524f723 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 17 May 2014 21:57:37 +0000 Subject: [PATCH] LUCENE-5675: testRandom seems to be passing git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595548 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/idversion/IDVersionSegmentTermsEnum.java | 11 ++++++----- .../lucene/codecs/blocktree/SegmentTermsEnum.java | 3 +-- .../test/org/apache/lucene/index/TestTermsEnum.java | 9 ++++----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java index 54f30471ac3..705d8b56525 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -210,8 +210,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { return true; } - // nocommit we need a seekExact(BytesRef target, long minVersion) API? - @Override public boolean seekExact(final BytesRef target) throws IOException { return seekExact(target, 0); @@ -230,7 +228,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { } } - /** Returns false if the term deos not exist, or it exists but its version is < minIDVersion. */ + /** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException { if (fr.index == null) { @@ -258,6 +256,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { targetBeforeCurrentLength = currentFrame.ord; + boolean rewind = false; + // nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check? if (currentFrame != staticFrame) { @@ -351,6 +351,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { // keep the currentFrame but we must rewind it // (so we scan from the start) targetBeforeCurrentLength = 0; + rewind = true; if (DEBUG) { System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord); } @@ -459,7 +460,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { if (currentFrame.maxIDVersion < minIDVersion) { // The max version for all terms in this block is lower than the minVersion - if (currentFrame.fp != startFrameFP) { + if (currentFrame.fp != startFrameFP || rewind) { //if (targetUpto+1 > term.length) { termExists = false; term.bytes[targetUpto] = (byte) targetLabel; @@ -473,7 +474,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { //termExists = false; //} if (DEBUG) { - System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix); + System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp); } return false; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java index bdab3520390..04dfa83be33 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java @@ -51,7 +51,7 @@ final class SegmentTermsEnum extends TermsEnum { private int targetBeforeCurrentLength; - static boolean DEBUG = true; + static boolean DEBUG = false; private final ByteArrayDataInput scratchReader = new ByteArrayDataInput(); @@ -69,7 +69,6 @@ final class SegmentTermsEnum extends TermsEnum { public SegmentTermsEnum(FieldReader fr) throws IOException { this.fr = fr; - System.out.println("STE: init"); //if (DEBUG) System.out.println("BTTR.init seg=" + segment); stack = new SegmentTermsEnumFrame[0]; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java index 02f72b4c071..aef62a661d0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java @@ -967,20 +967,19 @@ public class TestTermsEnum extends LuceneTestCase { d.close(); } - // nocommit mark slow/nigthly: O(N^2)!! - // Stresses out many-terms-in-root-block case: + @Nightly public void testVaryingTermsPerSegment() throws Exception { Directory dir = newDirectory(); Set terms = new HashSet(); - int MAX_TERMS = 10000; + int MAX_TERMS = atLeast(1000); while (terms.size() < MAX_TERMS) { - terms.add(new BytesRef(TestUtil.randomSimpleString(random()))); + terms.add(new BytesRef(TestUtil.randomSimpleString(random(), 1, 40))); } List termsList = new ArrayList<>(terms); StringBuilder sb = new StringBuilder(); for(int termCount=0;termCount<10000;termCount++) { - System.out.println("\nTEST: termCount=" + termCount); + System.out.println("\nTEST: termCount=" + termCount + " add term=" + termsList.get(termCount).utf8ToString()); sb.append(' '); sb.append(termsList.get(termCount).utf8ToString()); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));