mirror of https://github.com/apache/lucene.git
LUCENE-3260: fix wrong result from MultiTermsEnum.next() after seekExact
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1141593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b5be90974b
commit
9285e08bce
|
@ -43,6 +43,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
|
||||
|
||||
private BytesRef lastSeek;
|
||||
private boolean lastSeekExact;
|
||||
private final BytesRef lastSeekScratch = new BytesRef();
|
||||
|
||||
private int numTop;
|
||||
|
@ -149,6 +150,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
lastSeek = null;
|
||||
lastSeekExact = true;
|
||||
|
||||
for(int i=0;i<numSubs;i++) {
|
||||
final boolean status;
|
||||
|
@ -179,6 +181,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
if (status) {
|
||||
top[numTop++] = currentSubs[i];
|
||||
current = currentSubs[i].current = currentSubs[i].terms.term();
|
||||
assert term.equals(currentSubs[i].current);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -191,6 +194,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
|
||||
queue.clear();
|
||||
numTop = 0;
|
||||
lastSeekExact = false;
|
||||
|
||||
boolean seekOpt = false;
|
||||
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
|
||||
|
@ -293,6 +297,17 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (lastSeekExact) {
|
||||
// Must seekCeil at this point, so those subs that
|
||||
// didn't have the term can find the following term.
|
||||
// NOTE: we could save some CPU by only seekCeil the
|
||||
// subs that didn't match the last exact seek... but
|
||||
// most impls short-circuit if you seekCeil to term
|
||||
// they are already on.
|
||||
final SeekStatus status = seekCeil(current);
|
||||
assert status == SeekStatus.FOUND;
|
||||
lastSeekExact = false;
|
||||
}
|
||||
lastSeek = null;
|
||||
|
||||
// restore queue
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestTermsEnum extends LuceneTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final Directory d = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random, d);
|
||||
final int numDocs = atLeast(10);
|
||||
for(int docCount=0;docCount<numDocs;docCount++) {
|
||||
w.addDocument(docs.nextDoc());
|
||||
}
|
||||
final IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
final List<BytesRef> terms = new ArrayList<BytesRef>();
|
||||
final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator();
|
||||
BytesRef term;
|
||||
while((term = termsEnum.next()) != null) {
|
||||
terms.add(new BytesRef(term));
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: " + terms.size() + " terms");
|
||||
}
|
||||
|
||||
int upto = -1;
|
||||
final int iters = atLeast(200);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
final boolean isEnd;
|
||||
if (upto != -1 && random.nextBoolean()) {
|
||||
// next
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter next");
|
||||
}
|
||||
isEnd = termsEnum.next() == null;
|
||||
upto++;
|
||||
if (isEnd) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" end");
|
||||
}
|
||||
assertEquals(upto, terms.size());
|
||||
upto = -1;
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + termsEnum.term().utf8ToString() + " expected=" + terms.get(upto).utf8ToString());
|
||||
}
|
||||
assertTrue(upto < terms.size());
|
||||
assertEquals(terms.get(upto), termsEnum.term());
|
||||
}
|
||||
} else {
|
||||
|
||||
final BytesRef target;
|
||||
final String exists;
|
||||
if (random.nextBoolean()) {
|
||||
// likely fake term
|
||||
if (random.nextBoolean()) {
|
||||
target = new BytesRef(_TestUtil.randomSimpleString(random));
|
||||
} else {
|
||||
target = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
|
||||
}
|
||||
exists = "likely not";
|
||||
} else {
|
||||
// real term
|
||||
target = terms.get(random.nextInt(terms.size()));
|
||||
exists = "yes";
|
||||
}
|
||||
|
||||
upto = Collections.binarySearch(terms, target);
|
||||
|
||||
if (random.nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + " exists=" + exists);
|
||||
}
|
||||
// seekCeil
|
||||
final TermsEnum.SeekStatus status = termsEnum.seekCeil(target, random.nextBoolean());
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got " + status);
|
||||
}
|
||||
|
||||
if (upto < 0) {
|
||||
upto = -(upto+1);
|
||||
if (upto >= terms.size()) {
|
||||
assertEquals(TermsEnum.SeekStatus.END, status);
|
||||
upto = -1;
|
||||
} else {
|
||||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
|
||||
assertEquals(terms.get(upto), termsEnum.term());
|
||||
}
|
||||
} else {
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, status);
|
||||
assertEquals(terms.get(upto), termsEnum.term());
|
||||
}
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + " exists=" + exists);
|
||||
}
|
||||
// seekExact
|
||||
final boolean result = termsEnum.seekExact(target, false);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got " + result);
|
||||
}
|
||||
if (upto < 0) {
|
||||
assertFalse(result);
|
||||
upto = -1;
|
||||
} else {
|
||||
assertTrue(result);
|
||||
assertEquals(target, termsEnum.term());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
}
|
|
@ -714,12 +714,12 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
if (random.nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
||||
System.out.println(" do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
||||
}
|
||||
isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
||||
System.out.println(" do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
||||
}
|
||||
isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
|
||||
}
|
||||
|
|
|
@ -27,10 +27,8 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
|
@ -798,6 +796,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
|||
// per step)
|
||||
|
||||
while (otherTaxonomiesLeft>0) {
|
||||
// TODO: use a pq here
|
||||
String first=null;
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (currentOthers[i]==null) continue;
|
||||
|
@ -819,7 +818,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
|||
int newordinal = internalAddCategory(cp, cp.length());
|
||||
// TODO (Facet): we already had this term in our hands before, in nextTE...
|
||||
// // TODO (Facet): no need to make this term?
|
||||
Term t = new Term(Consts.FULL, first);
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (first.equals(currentOthers[i])) {
|
||||
// remember the remapping of this ordinal. Note how
|
||||
|
@ -828,8 +826,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
|||
// like Lucene's merge works, we hope there are few seeks.
|
||||
// TODO (Facet): is there a quicker way? E.g., not specifying the
|
||||
// next term by name every time?
|
||||
SeekStatus result = othertes[i].seekCeil(t.bytes(), false);
|
||||
assert result == SeekStatus.FOUND;
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
|
||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||
int origordinal = otherdocsEnum[i].docID();
|
||||
|
@ -847,10 +843,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
|||
// to be added because it already existed in the main taxonomy.
|
||||
|
||||
// TODO (Facet): Again, is there a quicker way?
|
||||
Term t = new Term(Consts.FULL, first);
|
||||
// TODO: fix bug in MTE seekExact and use that instead.
|
||||
SeekStatus result = mainte.seekCeil(t.bytes(), false);
|
||||
assert result == SeekStatus.FOUND; // // TODO (Facet): explicit check / throw exception?
|
||||
mainde = mainte.docs(MultiFields.getDeletedDocs(mainreader), mainde);
|
||||
mainde.nextDoc(); // TODO (Facet): check?
|
||||
int newordinal = mainde.docID();
|
||||
|
@ -859,8 +851,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
|||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (first.equals(currentOthers[i])) {
|
||||
// TODO (Facet): again, is there a quicker way?
|
||||
result = othertes[i].seekCeil(t.bytes(), false);
|
||||
assert result == SeekStatus.FOUND; // TODO (Facet): explicit check / throw exception?
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
|
||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||
int origordinal = otherdocsEnum[i].docID();
|
||||
|
|
Loading…
Reference in New Issue