mirror of https://github.com/apache/lucene.git
LUCENE-3260: fix wrong result from MultiTermsEnum.next() after seekExact
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1141593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b5be90974b
commit
9285e08bce
|
@ -43,6 +43,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
|
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
|
||||||
|
|
||||||
private BytesRef lastSeek;
|
private BytesRef lastSeek;
|
||||||
|
private boolean lastSeekExact;
|
||||||
private final BytesRef lastSeekScratch = new BytesRef();
|
private final BytesRef lastSeekScratch = new BytesRef();
|
||||||
|
|
||||||
private int numTop;
|
private int numTop;
|
||||||
|
@ -149,6 +150,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
lastSeek = null;
|
lastSeek = null;
|
||||||
|
lastSeekExact = true;
|
||||||
|
|
||||||
for(int i=0;i<numSubs;i++) {
|
for(int i=0;i<numSubs;i++) {
|
||||||
final boolean status;
|
final boolean status;
|
||||||
|
@ -179,6 +181,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
if (status) {
|
if (status) {
|
||||||
top[numTop++] = currentSubs[i];
|
top[numTop++] = currentSubs[i];
|
||||||
current = currentSubs[i].current = currentSubs[i].terms.term();
|
current = currentSubs[i].current = currentSubs[i].terms.term();
|
||||||
|
assert term.equals(currentSubs[i].current);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,6 +194,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
|
public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
|
||||||
queue.clear();
|
queue.clear();
|
||||||
numTop = 0;
|
numTop = 0;
|
||||||
|
lastSeekExact = false;
|
||||||
|
|
||||||
boolean seekOpt = false;
|
boolean seekOpt = false;
|
||||||
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
|
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
|
||||||
|
@ -293,6 +297,17 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef next() throws IOException {
|
public BytesRef next() throws IOException {
|
||||||
|
if (lastSeekExact) {
|
||||||
|
// Must seekCeil at this point, so those subs that
|
||||||
|
// didn't have the term can find the following term.
|
||||||
|
// NOTE: we could save some CPU by only seekCeil the
|
||||||
|
// subs that didn't match the last exact seek... but
|
||||||
|
// most impls short-circuit if you seekCeil to term
|
||||||
|
// they are already on.
|
||||||
|
final SeekStatus status = seekCeil(current);
|
||||||
|
assert status == SeekStatus.FOUND;
|
||||||
|
lastSeekExact = false;
|
||||||
|
}
|
||||||
lastSeek = null;
|
lastSeek = null;
|
||||||
|
|
||||||
// restore queue
|
// restore queue
|
||||||
|
|
|
@ -0,0 +1,143 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
public class TestTermsEnum extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void test() throws Exception {
|
||||||
|
final LineFileDocs docs = new LineFileDocs(random);
|
||||||
|
final Directory d = newDirectory();
|
||||||
|
final RandomIndexWriter w = new RandomIndexWriter(random, d);
|
||||||
|
final int numDocs = atLeast(10);
|
||||||
|
for(int docCount=0;docCount<numDocs;docCount++) {
|
||||||
|
w.addDocument(docs.nextDoc());
|
||||||
|
}
|
||||||
|
final IndexReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
final List<BytesRef> terms = new ArrayList<BytesRef>();
|
||||||
|
final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator();
|
||||||
|
BytesRef term;
|
||||||
|
while((term = termsEnum.next()) != null) {
|
||||||
|
terms.add(new BytesRef(term));
|
||||||
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: " + terms.size() + " terms");
|
||||||
|
}
|
||||||
|
|
||||||
|
int upto = -1;
|
||||||
|
final int iters = atLeast(200);
|
||||||
|
for(int iter=0;iter<iters;iter++) {
|
||||||
|
final boolean isEnd;
|
||||||
|
if (upto != -1 && random.nextBoolean()) {
|
||||||
|
// next
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: iter next");
|
||||||
|
}
|
||||||
|
isEnd = termsEnum.next() == null;
|
||||||
|
upto++;
|
||||||
|
if (isEnd) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" end");
|
||||||
|
}
|
||||||
|
assertEquals(upto, terms.size());
|
||||||
|
upto = -1;
|
||||||
|
} else {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" got term=" + termsEnum.term().utf8ToString() + " expected=" + terms.get(upto).utf8ToString());
|
||||||
|
}
|
||||||
|
assertTrue(upto < terms.size());
|
||||||
|
assertEquals(terms.get(upto), termsEnum.term());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
final BytesRef target;
|
||||||
|
final String exists;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
// likely fake term
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
target = new BytesRef(_TestUtil.randomSimpleString(random));
|
||||||
|
} else {
|
||||||
|
target = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
|
||||||
|
}
|
||||||
|
exists = "likely not";
|
||||||
|
} else {
|
||||||
|
// real term
|
||||||
|
target = terms.get(random.nextInt(terms.size()));
|
||||||
|
exists = "yes";
|
||||||
|
}
|
||||||
|
|
||||||
|
upto = Collections.binarySearch(terms, target);
|
||||||
|
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + " exists=" + exists);
|
||||||
|
}
|
||||||
|
// seekCeil
|
||||||
|
final TermsEnum.SeekStatus status = termsEnum.seekCeil(target, random.nextBoolean());
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" got " + status);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (upto < 0) {
|
||||||
|
upto = -(upto+1);
|
||||||
|
if (upto >= terms.size()) {
|
||||||
|
assertEquals(TermsEnum.SeekStatus.END, status);
|
||||||
|
upto = -1;
|
||||||
|
} else {
|
||||||
|
assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
|
||||||
|
assertEquals(terms.get(upto), termsEnum.term());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assertEquals(TermsEnum.SeekStatus.FOUND, status);
|
||||||
|
assertEquals(terms.get(upto), termsEnum.term());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + " exists=" + exists);
|
||||||
|
}
|
||||||
|
// seekExact
|
||||||
|
final boolean result = termsEnum.seekExact(target, false);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" got " + result);
|
||||||
|
}
|
||||||
|
if (upto < 0) {
|
||||||
|
assertFalse(result);
|
||||||
|
upto = -1;
|
||||||
|
} else {
|
||||||
|
assertTrue(result);
|
||||||
|
assertEquals(target, termsEnum.term());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -714,12 +714,12 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
|
|
||||||
if (random.nextBoolean()) {
|
if (random.nextBoolean()) {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println(" do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
System.out.println(" do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
||||||
}
|
}
|
||||||
isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
|
isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
|
||||||
} else {
|
} else {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println(" do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
System.out.println(" do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
|
||||||
}
|
}
|
||||||
isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
|
isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,10 +27,8 @@ import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.LockObtainFailedException;
|
import org.apache.lucene.store.LockObtainFailedException;
|
||||||
|
@ -798,6 +796,7 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
||||||
// per step)
|
// per step)
|
||||||
|
|
||||||
while (otherTaxonomiesLeft>0) {
|
while (otherTaxonomiesLeft>0) {
|
||||||
|
// TODO: use a pq here
|
||||||
String first=null;
|
String first=null;
|
||||||
for (int i=0; i<taxonomies.length; i++) {
|
for (int i=0; i<taxonomies.length; i++) {
|
||||||
if (currentOthers[i]==null) continue;
|
if (currentOthers[i]==null) continue;
|
||||||
|
@ -819,7 +818,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
||||||
int newordinal = internalAddCategory(cp, cp.length());
|
int newordinal = internalAddCategory(cp, cp.length());
|
||||||
// TODO (Facet): we already had this term in our hands before, in nextTE...
|
// TODO (Facet): we already had this term in our hands before, in nextTE...
|
||||||
// // TODO (Facet): no need to make this term?
|
// // TODO (Facet): no need to make this term?
|
||||||
Term t = new Term(Consts.FULL, first);
|
|
||||||
for (int i=0; i<taxonomies.length; i++) {
|
for (int i=0; i<taxonomies.length; i++) {
|
||||||
if (first.equals(currentOthers[i])) {
|
if (first.equals(currentOthers[i])) {
|
||||||
// remember the remapping of this ordinal. Note how
|
// remember the remapping of this ordinal. Note how
|
||||||
|
@ -828,8 +826,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
||||||
// like Lucene's merge works, we hope there are few seeks.
|
// like Lucene's merge works, we hope there are few seeks.
|
||||||
// TODO (Facet): is there a quicker way? E.g., not specifying the
|
// TODO (Facet): is there a quicker way? E.g., not specifying the
|
||||||
// next term by name every time?
|
// next term by name every time?
|
||||||
SeekStatus result = othertes[i].seekCeil(t.bytes(), false);
|
|
||||||
assert result == SeekStatus.FOUND;
|
|
||||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
|
otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
|
||||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||||
int origordinal = otherdocsEnum[i].docID();
|
int origordinal = otherdocsEnum[i].docID();
|
||||||
|
@ -847,10 +843,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
||||||
// to be added because it already existed in the main taxonomy.
|
// to be added because it already existed in the main taxonomy.
|
||||||
|
|
||||||
// TODO (Facet): Again, is there a quicker way?
|
// TODO (Facet): Again, is there a quicker way?
|
||||||
Term t = new Term(Consts.FULL, first);
|
|
||||||
// TODO: fix bug in MTE seekExact and use that instead.
|
|
||||||
SeekStatus result = mainte.seekCeil(t.bytes(), false);
|
|
||||||
assert result == SeekStatus.FOUND; // // TODO (Facet): explicit check / throw exception?
|
|
||||||
mainde = mainte.docs(MultiFields.getDeletedDocs(mainreader), mainde);
|
mainde = mainte.docs(MultiFields.getDeletedDocs(mainreader), mainde);
|
||||||
mainde.nextDoc(); // TODO (Facet): check?
|
mainde.nextDoc(); // TODO (Facet): check?
|
||||||
int newordinal = mainde.docID();
|
int newordinal = mainde.docID();
|
||||||
|
@ -859,8 +851,6 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter {
|
||||||
for (int i=0; i<taxonomies.length; i++) {
|
for (int i=0; i<taxonomies.length; i++) {
|
||||||
if (first.equals(currentOthers[i])) {
|
if (first.equals(currentOthers[i])) {
|
||||||
// TODO (Facet): again, is there a quicker way?
|
// TODO (Facet): again, is there a quicker way?
|
||||||
result = othertes[i].seekCeil(t.bytes(), false);
|
|
||||||
assert result == SeekStatus.FOUND; // TODO (Facet): explicit check / throw exception?
|
|
||||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
|
otherdocsEnum[i] = othertes[i].docs(MultiFields.getDeletedDocs(otherreaders[i]), otherdocsEnum[i]);
|
||||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||||
int origordinal = otherdocsEnum[i].docID();
|
int origordinal = otherdocsEnum[i].docID();
|
||||||
|
|
Loading…
Reference in New Issue