mirror of https://github.com/apache/lucene.git
LUCENE-6315: simplify the private iterator Lucene uses when resolving deleted terms -> docids
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1672710 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de61fa6f3c
commit
6f1b9bb79a
|
@ -84,6 +84,10 @@ Other
|
|||
|
||||
* LUCENE-5439: Add 'ant jacoco' build target. (Robert Muir)
|
||||
|
||||
* LUCENE-6315: Simplify the private iterator Lucene uses internally
|
||||
when resolving deleted terms to matched docids. (Robert Muir, Adrien
|
||||
Grand, Mike McCandless)
|
||||
|
||||
======================= Lucene 5.1.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -328,12 +328,6 @@ class BufferedUpdatesStream implements Accountable {
|
|||
}
|
||||
|
||||
if (infoStream.isEnabled("BD")) {
|
||||
Directory dir;
|
||||
if (segmentInfos.size() > 0) {
|
||||
dir = segmentInfos.info(0).info.dir;
|
||||
} else {
|
||||
dir = null;
|
||||
}
|
||||
infoStream.message("BD", "prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + updates.size());
|
||||
}
|
||||
final int limit = updates.size();
|
||||
|
@ -489,18 +483,13 @@ class BufferedUpdatesStream implements Accountable {
|
|||
String field = null;
|
||||
SegmentQueue queue = null;
|
||||
|
||||
while (true) {
|
||||
BytesRef term;
|
||||
|
||||
boolean newField;
|
||||
while ((term = iter.next()) != null) {
|
||||
|
||||
newField = iter.next();
|
||||
|
||||
if (newField) {
|
||||
if (iter.field() != field) {
|
||||
// field changed
|
||||
field = iter.field();
|
||||
if (field == null) {
|
||||
// No more terms:
|
||||
break;
|
||||
}
|
||||
|
||||
queue = new SegmentQueue(numReaders);
|
||||
|
||||
|
@ -521,9 +510,8 @@ class BufferedUpdatesStream implements Accountable {
|
|||
assert checkDeleteTerm(null);
|
||||
}
|
||||
|
||||
// Get next term to delete
|
||||
BytesRef term = iter.term();
|
||||
assert checkDeleteTerm(term);
|
||||
|
||||
delTermVisitedCount++;
|
||||
|
||||
long delGen = iter.delGen();
|
||||
|
|
|
@ -17,24 +17,24 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
||||
// TODO: maybe TermsFilter could use this?
|
||||
|
||||
/** Iterates over terms in multiple fields, notifying the caller when a new field is started. */
|
||||
interface FieldTermIterator {
|
||||
/** Advances to the next term, returning true if it's in a new field or there are no more terms. Call {@link #field} to see which
|
||||
* field; if that returns null then the iteration ended. */
|
||||
boolean next();
|
||||
/** Iterates over terms in across multiple fields. The caller must
|
||||
* check {@link #field} after each {@link #next} to see if the field
|
||||
* changed, but {@code ==} can be used since the iterator
|
||||
* implementation ensures it will use the same String instance for
|
||||
* a given field. */
|
||||
|
||||
/** Returns current field, or null if the iteration ended. */
|
||||
String field();
|
||||
|
||||
/** Returns current term. */
|
||||
BytesRef term();
|
||||
abstract class FieldTermIterator implements BytesRefIterator {
|
||||
/** Returns current field. This method should not be called
|
||||
* after iteration is done. Note that you may use == to
|
||||
* detect a change in field. */
|
||||
abstract String field();
|
||||
|
||||
/** Del gen of the current term. */
|
||||
// TODO: this is really per-iterator not per term, but when we use MergedPrefixCodedTermsIterator we need to know which iterator we are on
|
||||
long delGen();
|
||||
abstract long delGen();
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Merges multiple {@link FieldTermIterator}s */
|
||||
class MergedPrefixCodedTermsIterator implements FieldTermIterator {
|
||||
class MergedPrefixCodedTermsIterator extends FieldTermIterator {
|
||||
|
||||
private static class TermMergeQueue extends PriorityQueue<TermIterator> {
|
||||
TermMergeQueue(int size) {
|
||||
|
@ -74,53 +74,51 @@ class MergedPrefixCodedTermsIterator implements FieldTermIterator {
|
|||
String field;
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
public BytesRef next() {
|
||||
if (termQueue.size() == 0) {
|
||||
// Current field is done:
|
||||
// No more terms in current field:
|
||||
if (fieldQueue.size() == 0) {
|
||||
// No more fields:
|
||||
field = null;
|
||||
return true;
|
||||
return null;
|
||||
}
|
||||
|
||||
// Transfer all iterators on the next field into the term queue:
|
||||
TermIterator top = fieldQueue.pop();
|
||||
termQueue.add(top);
|
||||
assert top.field() != null;
|
||||
field = top.field;
|
||||
assert field != null;
|
||||
|
||||
while (fieldQueue.size() != 0 && fieldQueue.top().field.equals(top.field)) {
|
||||
termQueue.add(fieldQueue.pop());
|
||||
TermIterator iter = fieldQueue.pop();
|
||||
assert iter.field.equals(field);
|
||||
// TODO: a little bit evil; we do this so we can == on field down below:
|
||||
iter.field = field;
|
||||
termQueue.add(iter);
|
||||
}
|
||||
|
||||
field = top.field;
|
||||
return true;
|
||||
return termQueue.top().bytes;
|
||||
} else {
|
||||
TermIterator top = termQueue.top();
|
||||
if (top.next()) {
|
||||
// New field
|
||||
if (top.next() == null) {
|
||||
termQueue.pop();
|
||||
if (top.field() != null) {
|
||||
fieldQueue.add(top);
|
||||
}
|
||||
} else if (top.field() != field) {
|
||||
// Field changed
|
||||
termQueue.pop();
|
||||
fieldQueue.add(top);
|
||||
} else {
|
||||
termQueue.updateTop();
|
||||
}
|
||||
|
||||
if (termQueue.size() != 0) {
|
||||
// Still terms left in this field
|
||||
return false;
|
||||
} else {
|
||||
if (termQueue.size() == 0) {
|
||||
// Recurse (just once) to go to next field:
|
||||
return next();
|
||||
} else {
|
||||
// Still terms left in this field
|
||||
return termQueue.top().bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return termQueue.top().bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String field() {
|
||||
return field;
|
||||
|
|
|
@ -104,7 +104,7 @@ class PrefixCodedTerms implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
public static class TermIterator implements FieldTermIterator {
|
||||
public static class TermIterator extends FieldTermIterator {
|
||||
final IndexInput input;
|
||||
final BytesRefBuilder builder = new BytesRefBuilder();
|
||||
final BytesRef bytes = builder.get();
|
||||
|
@ -123,7 +123,7 @@ class PrefixCodedTerms implements Accountable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean next() {
|
||||
public BytesRef next() {
|
||||
if (input.getFilePointer() < end) {
|
||||
try {
|
||||
int code = input.readVInt();
|
||||
|
@ -134,13 +134,13 @@ class PrefixCodedTerms implements Accountable {
|
|||
int prefix = code >>> 1;
|
||||
int suffix = input.readVInt();
|
||||
readTermBytes(prefix, suffix);
|
||||
return newField;
|
||||
return bytes;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
} else {
|
||||
field = null;
|
||||
return true;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -151,11 +151,6 @@ class PrefixCodedTerms implements Accountable {
|
|||
builder.setLength(prefix + suffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String field() {
|
||||
return field;
|
||||
|
|
|
@ -79,17 +79,9 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
HashSet<Term> frozenSet = new HashSet<>();
|
||||
BytesRefBuilder bytesRef = new BytesRefBuilder();
|
||||
TermIterator iter = queue.freezeGlobalBuffer(null).termIterator();
|
||||
String field = null;
|
||||
while (true) {
|
||||
boolean newField = iter.next();
|
||||
if (newField) {
|
||||
field = iter.field;
|
||||
if (field == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (iter.next() != null) {
|
||||
bytesRef.copyBytes(iter.bytes);
|
||||
frozenSet.add(new Term(field, bytesRef.toBytesRef()));
|
||||
frozenSet.add(new Term(iter.field(), bytesRef.toBytesRef()));
|
||||
}
|
||||
assertEquals(uniqueValues, frozenSet);
|
||||
assertEquals("num deletes must be 0 after freeze", 0, queue
|
||||
|
@ -110,16 +102,12 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
queue.clear();
|
||||
assertFalse(queue.anyChanges());
|
||||
final int size = 200 + random().nextInt(500) * RANDOM_MULTIPLIER;
|
||||
int termsSinceFreeze = 0;
|
||||
int queriesSinceFreeze = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
Term term = new Term("id", "" + i);
|
||||
if (random().nextInt(10) == 0) {
|
||||
queue.addDelete(new TermQuery(term));
|
||||
queriesSinceFreeze++;
|
||||
} else {
|
||||
queue.addDelete(term);
|
||||
termsSinceFreeze++;
|
||||
}
|
||||
assertTrue(queue.anyChanges());
|
||||
if (random().nextInt(10) == 0) {
|
||||
|
@ -218,17 +206,9 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
|
||||
TermIterator iter = queue.freezeGlobalBuffer(null).termIterator();
|
||||
String field = null;
|
||||
while (true) {
|
||||
boolean newField = iter.next();
|
||||
if (newField) {
|
||||
field = iter.field;
|
||||
if (field == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (iter.next() != null) {
|
||||
builder.copyBytes(iter.bytes);
|
||||
frozenSet.add(new Term(field, builder.toBytesRef()));
|
||||
frozenSet.add(new Term(iter.field(), builder.toBytesRef()));
|
||||
}
|
||||
|
||||
assertEquals("num deletes must be 0 after freeze", 0, queue
|
||||
|
|
|
@ -33,8 +33,7 @@ public class TestPrefixCodedTerms extends LuceneTestCase {
|
|||
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
|
||||
PrefixCodedTerms pb = b.finish();
|
||||
TermIterator iter = pb.iterator();
|
||||
assertTrue(iter.next());
|
||||
assertNull(iter.field);
|
||||
assertNull(iter.next());
|
||||
}
|
||||
|
||||
public void testOne() {
|
||||
|
@ -43,11 +42,10 @@ public class TestPrefixCodedTerms extends LuceneTestCase {
|
|||
b.add(term);
|
||||
PrefixCodedTerms pb = b.finish();
|
||||
TermIterator iter = pb.iterator();
|
||||
assertTrue(iter.next());
|
||||
assertEquals("foo", iter.field);
|
||||
assertNotNull(iter.next());
|
||||
assertEquals("foo", iter.field());
|
||||
assertEquals("bogus", iter.bytes.utf8ToString());
|
||||
assertTrue(iter.next());
|
||||
assertNull(iter.field);
|
||||
assertNull(iter.next());
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
|
@ -66,19 +64,10 @@ public class TestPrefixCodedTerms extends LuceneTestCase {
|
|||
|
||||
TermIterator iter = pb.iterator();
|
||||
Iterator<Term> expected = terms.iterator();
|
||||
String field = "";
|
||||
//System.out.println("TEST: now iter");
|
||||
while (true) {
|
||||
boolean newField = iter.next();
|
||||
//System.out.println(" newField=" + newField);
|
||||
if (newField) {
|
||||
field = iter.field;
|
||||
if (field == null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (iter.next() != null) {
|
||||
assertTrue(expected.hasNext());
|
||||
assertEquals(expected.next(), new Term(field, iter.bytes));
|
||||
assertEquals(expected.next(), new Term(iter.field(), iter.bytes));
|
||||
}
|
||||
|
||||
assertFalse(expected.hasNext());
|
||||
|
@ -97,13 +86,14 @@ public class TestPrefixCodedTerms extends LuceneTestCase {
|
|||
PrefixCodedTerms pb2 = b2.finish();
|
||||
|
||||
MergedPrefixCodedTermsIterator merged = new MergedPrefixCodedTermsIterator(Arrays.asList(new PrefixCodedTerms[] {pb1, pb2}));
|
||||
assertTrue(merged.next());
|
||||
BytesRef term = merged.next();
|
||||
assertNotNull(term);
|
||||
assertEquals("foo", merged.field());
|
||||
assertEquals("a", merged.term().utf8ToString());
|
||||
assertFalse(merged.next());
|
||||
assertEquals("b", merged.term().utf8ToString());
|
||||
assertTrue(merged.next());
|
||||
assertNull(merged.field());
|
||||
assertEquals("a", term.utf8ToString());
|
||||
term = merged.next();
|
||||
assertNotNull(term);
|
||||
assertEquals("b", term.utf8ToString());
|
||||
assertNull(merged.next());
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked","rawtypes"})
|
||||
|
@ -137,25 +127,20 @@ public class TestPrefixCodedTerms extends LuceneTestCase {
|
|||
String field = "";
|
||||
|
||||
BytesRef lastTerm = null;
|
||||
|
||||
while (true) {
|
||||
if (actual.next()) {
|
||||
BytesRef term;
|
||||
while ((term = actual.next()) != null) {
|
||||
if (field != actual.field()) {
|
||||
field = actual.field();
|
||||
if (field == null) {
|
||||
break;
|
||||
}
|
||||
lastTerm = null;
|
||||
//System.out.println("\nTEST: new field: " + field);
|
||||
}
|
||||
if (lastTerm != null && lastTerm.equals(actual.term())) {
|
||||
if (lastTerm != null && lastTerm.equals(term)) {
|
||||
continue;
|
||||
}
|
||||
//System.out.println("TEST: iter: field=" + field + " term=" + actual.term());
|
||||
lastTerm = BytesRef.deepCopyOf(actual.term());
|
||||
lastTerm = BytesRef.deepCopyOf(term);
|
||||
assertTrue(expected.hasNext());
|
||||
|
||||
Term expectedTerm = expected.next();
|
||||
assertEquals(expectedTerm, new Term(field, actual.term()));
|
||||
assertEquals(expectedTerm, new Term(field, term));
|
||||
}
|
||||
|
||||
assertFalse(expected.hasNext());
|
||||
|
|
Loading…
Reference in New Issue