LUCENE-3260: fix wrong result from MultiTermsEnum.next() after seekExact

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1141593 13f79535-47bb-0310-9956-ffa450edef68
2011-06-30 16:05:42 +00:00 · 2011-06-30 16:05:42 +00:00 · 9285e08bce
parent b5be90974b
commit 9285e08bce
4 changed files with 161 additions and 13 deletions
--- a/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
@ -43,6 +43,7 @@ public final class MultiTermsEnum extends TermsEnum {
  private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
  private BytesRef lastSeek;
  private boolean lastSeekExact;
  private final BytesRef lastSeekScratch = new BytesRef();
  private int numTop;
@ -149,6 +150,7 @@ public final class MultiTermsEnum extends TermsEnum {
    }
    lastSeek = null;
    lastSeekExact = true;
    for(int i=0;i<numSubs;i++) {
      final boolean status;
@ -179,6 +181,7 @@ public final class MultiTermsEnum extends TermsEnum {
      if (status) {
        top[numTop++] = currentSubs[i];
        current = currentSubs[i].current = currentSubs[i].terms.term();
        assert term.equals(currentSubs[i].current);
      }
    }
@ -191,6 +194,7 @@ public final class MultiTermsEnum extends TermsEnum {
  public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
    queue.clear();
    numTop = 0;
    lastSeekExact = false;
    boolean seekOpt = false;
    if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
@ -293,6 +297,17 @@ public final class MultiTermsEnum extends TermsEnum {
  @Override
  public BytesRef next() throws IOException {
    if (lastSeekExact) {
      // Must seekCeil at this point, so those subs that
      // didn't have the term can find the following term.
      // NOTE: we could save some CPU by only seekCeil the
      // subs that didn't match the last exact seek... but
      // most impls short-circuit if you seekCeil to term
      // they are already on.
      final SeekStatus status = seekCeil(current);
      assert status == SeekStatus.FOUND;
      lastSeekExact = false;
    }
    lastSeek = null;
    // restore queue
--- a/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
@ -0,0 +1,143 @@
 package org.apache.lucene.index;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 public class TestTermsEnum extends LuceneTestCase {
  public void test() throws Exception {
    final LineFileDocs docs = new LineFileDocs(random);
    final Directory d = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random, d);
    final int numDocs = atLeast(10);
    for(int docCount=0;docCount<numDocs;docCount++) {
      w.addDocument(docs.nextDoc());
    }
    final IndexReader r = w.getReader();
    w.close();
    final List<BytesRef> terms = new ArrayList<BytesRef>();
    final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator();
    BytesRef term;
    while((term = termsEnum.next()) != null) {
      terms.add(new BytesRef(term));
    }
    if (VERBOSE) {
      System.out.println("TEST: " + terms.size() + " terms");
    }
    int upto = -1;
    final int iters = atLeast(200);
    for(int iter=0;iter<iters;iter++) {
      final boolean isEnd;
      if (upto != -1 && random.nextBoolean()) {
        // next
        if (VERBOSE) {
          System.out.println("TEST: iter next");
        }
        isEnd = termsEnum.next() == null;
        upto++;
        if (isEnd) {
          if (VERBOSE) {
            System.out.println("  end");
          }
          assertEquals(upto, terms.size());
          upto = -1;
        } else {
          if (VERBOSE) {
            System.out.println("  got term=" + termsEnum.term().utf8ToString() + " expected=" + terms.get(upto).utf8ToString());
          }
          assertTrue(upto < terms.size());
          assertEquals(terms.get(upto), termsEnum.term());
        }
      } else {
        final BytesRef target;
        final String exists;
        if (random.nextBoolean()) {
          // likely fake term
          if (random.nextBoolean()) {
            target = new BytesRef(_TestUtil.randomSimpleString(random));
          } else {
            target = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
          }
          exists = "likely not";
        } else {
          // real term
          target = terms.get(random.nextInt(terms.size()));
          exists = "yes";
        }
        upto = Collections.binarySearch(terms, target);
        if (random.nextBoolean()) {
          if (VERBOSE) {
            System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + " exists=" + exists);
          }
          // seekCeil
          final TermsEnum.SeekStatus status = termsEnum.seekCeil(target, random.nextBoolean());
          if (VERBOSE) {
            System.out.println("  got " + status);
          }
          if (upto < 0) {
            upto = -(upto+1);
            if (upto >= terms.size()) {
              assertEquals(TermsEnum.SeekStatus.END, status);
              upto = -1;
            } else {
              assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
              assertEquals(terms.get(upto), termsEnum.term());
            }
          } else {
            assertEquals(TermsEnum.SeekStatus.FOUND, status);
            assertEquals(terms.get(upto), termsEnum.term());
          }
        } else {
          if (VERBOSE) {
            System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + " exists=" + exists);
          }
          // seekExact
          final boolean result = termsEnum.seekExact(target, false);
          if (VERBOSE) {
            System.out.println("  got " + result);
          }
          if (upto < 0) {
            assertFalse(result);
            upto = -1;
          } else {
            assertTrue(result);
            assertEquals(target, termsEnum.term());
          }
        }
      }
    }
    r.close();
    d.close();
  }
 }
--- a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
@ -714,12 +714,12 @@ public class TestFSTs extends LuceneTestCase {
            if (random.nextBoolean()) {
              if (VERBOSE) {
-                System.out.println("  do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
+                System.out.println("  do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
              }
              isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
            } else {
              if (VERBOSE) {
-                System.out.println("  do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
+                System.out.println("  do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
              }
              isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
            }
--- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java
+++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java
@ -27,10 +27,8 @@ import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
@ -798,6 +796,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
    // per step)
    while (otherTaxonomiesLeft>0) {
      // TODO: use a pq here
      String first=null;
      for (int i=0; i<taxonomies.length; i++) {
        if (currentOthers[i]==null) continue;
@ -819,7 +818,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
        int newordinal = internalAddCategory(cp, cp.length());
        // TODO (Facet): we already had this term in our hands before, in nextTE...
        // // TODO (Facet): no need to make this term?
        Term t = new Term(Consts.FULL, first);
        for (int i=0; i<taxonomies.length; i++) {
          if (first.equals(currentOthers[i])) {
            // remember the remapping of this ordinal. Note how
@ -828,8 +826,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
            // like Lucene's merge works, we hope there are few seeks.
            // TODO (Facet): is there a quicker way? E.g., not specifying the
            // next term by name every time?
            SeekStatus result = othertes[i].seekCeil(t.bytes(), false);
            assert result == SeekStatus.FOUND;
            otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
            otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
            int origordinal = otherdocsEnum[i].docID();
@ -847,10 +843,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
        // to be added because it already existed in the main taxonomy.
        // TODO (Facet): Again, is there a quicker way?
        Term t = new Term(Consts.FULL, first);
        // TODO: fix bug in MTE seekExact and use that instead.
        SeekStatus result = mainte.seekCeil(t.bytes(), false);
        assert result == SeekStatus.FOUND; // // TODO (Facet): explicit check / throw exception?
        mainde = mainte.docs(MultiFields.getDeletedDocs(mainreader), mainde);
        mainde.nextDoc(); // TODO (Facet): check?
        int newordinal = mainde.docID();
@ -859,8 +851,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
        for (int i=0; i<taxonomies.length; i++) {
          if (first.equals(currentOthers[i])) {
            // TODO (Facet): again, is there a quicker way?
            result = othertes[i].seekCeil(t.bytes(), false);
            assert result == SeekStatus.FOUND; // TODO (Facet): explicit check / throw exception?
            otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
            otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
            int origordinal = otherdocsEnum[i].docID();