LUCENE-5675: finish reverting 'do not send deleted docs to PostingsFormat on flush'

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596602 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-05-21 15:41:55 +00:00
parent 2759adc754
commit c722f2fb9a
5 changed files with 115 additions and 170 deletions

View File

@ -1640,15 +1640,17 @@ public class CheckIndex {
// Again, with the one doc deleted:
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);
if (liveDocs != null && liveDocs.get(j) == false) {
// Only check live docs
continue;
// Only agg stats if the doc is live:
final boolean doStats = liveDocs == null || liveDocs.get(j);
if (doStats) {
status.docCount++;
}
status.docCount++;
for(String field : tfv) {
status.totVectors++;
if (doStats) {
status.totVectors++;
}
// Make sure FieldInfo thinks this field is vector'd:
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);

View File

@ -347,35 +347,33 @@ class FreqProxFields extends Fields {
@Override
public int nextDoc() throws IOException {
while (true) {
if (reader.eof()) {
if (ended) {
return NO_MORE_DOCS;
} else {
ended = true;
docID = postingsArray.lastDocIDs[termID];
if (readTermFreq) {
freq = postingsArray.termFreqs[termID];
}
}
if (reader.eof()) {
if (ended) {
return NO_MORE_DOCS;
} else {
int code = reader.readVInt();
if (!readTermFreq) {
docID += code;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
}
ended = true;
docID = postingsArray.lastDocIDs[termID];
if (readTermFreq) {
freq = postingsArray.termFreqs[termID];
}
}
} else {
int code = reader.readVInt();
if (!readTermFreq) {
docID += code;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
}
assert docID != postingsArray.lastDocIDs[termID];
}
return docID;
assert docID != postingsArray.lastDocIDs[termID];
}
return docID;
}
@Override
@ -436,37 +434,35 @@ class FreqProxFields extends Fields {
@Override
public int nextDoc() throws IOException {
while (true) {
while (posLeft != 0) {
nextPosition();
}
if (reader.eof()) {
if (ended) {
return NO_MORE_DOCS;
} else {
ended = true;
docID = postingsArray.lastDocIDs[termID];
freq = postingsArray.termFreqs[termID];
}
} else {
int code = reader.readVInt();
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
}
assert docID != postingsArray.lastDocIDs[termID];
}
posLeft = freq;
pos = 0;
startOffset = 0;
return docID;
while (posLeft != 0) {
nextPosition();
}
if (reader.eof()) {
if (ended) {
return NO_MORE_DOCS;
} else {
ended = true;
docID = postingsArray.lastDocIDs[termID];
freq = postingsArray.termFreqs[termID];
}
} else {
int code = reader.readVInt();
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
}
assert docID != postingsArray.lastDocIDs[termID];
}
posLeft = freq;
pos = 0;
startOffset = 0;
return docID;
}
@Override

View File

@ -508,7 +508,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
writer.shutdown();
IndexReader reader = DirectoryReader.open(dir);
final Term t = new Term("content", "aa");
assertEquals(2, reader.docFreq(t));
assertEquals(3, reader.docFreq(t));
// Make sure the doc that hit the exception was marked
// as deleted:
@ -648,7 +648,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
IndexReader reader = DirectoryReader.open(dir);
if (i == 0) {
int expected = 5;
assertEquals(expected-1, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc());
int numDel = 0;
final Bits liveDocs = MultiFields.getLiveDocs(reader);
@ -760,8 +760,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
IndexReader reader = DirectoryReader.open(dir);
int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER;
assertEquals("i=" + i, expected - NUM_THREAD*NUM_ITER, reader.docFreq(new Term("contents", "here")));
assertEquals("i=" + i, expected, reader.maxDoc());
assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc());
int numDel = 0;
final Bits liveDocs = MultiFields.getLiveDocs(reader);
assertNotNull(liveDocs);

View File

@ -123,18 +123,14 @@ public class TestMultiFields extends LuceneTestCase {
}
DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE);
if (docsEnum == null) {
for(int docID : docs.get(term)) {
assert deleted.contains(docID);
assertNotNull(docsEnum);
for(int docID : docs.get(term)) {
if (!deleted.contains(docID)) {
assertEquals(docID, docsEnum.nextDoc());
}
} else {
for(int docID : docs.get(term)) {
if (!deleted.contains(docID)) {
assertEquals(docID, docsEnum.nextDoc());
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
}
reader.close();

View File

@ -17,7 +17,8 @@ package org.apache.lucene.uninverting;
* limitations under the License.
*/
import java.io.IOException;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -46,17 +47,14 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
public class TestFieldCacheVsDocValues extends LuceneTestCase {
public void testByteMissingVsFieldCache() throws Exception {
@ -330,7 +328,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
AtomicReader r = context.reader();
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
SortedDocValues actual = r.getSortedDocValues("dv");
assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
dir.close();
@ -393,7 +391,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
AtomicReader r = context.reader();
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
@ -404,7 +402,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
AtomicReader ar = getOnlySegmentReader(ir);
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
assertEquals(ir.maxDoc(), ar.getLiveDocs(), expected, actual);
assertEquals(ir.maxDoc(), expected, actual);
ir.close();
writer.shutdown();
@ -489,149 +487,102 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
}
}
private void assertEquals(int maxDoc, Bits liveDocs, SortedDocValues expected, SortedDocValues actual) throws Exception {
assertEquals(maxDoc, liveDocs, DocValues.singleton(expected), DocValues.singleton(actual));
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
}
private void assertEquals(int maxDoc, Bits liveDocs, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
// can be null for the segment if no docs actually had any SortedDocValues
// in this case FC.getDocTermsOrds returns EMPTY
if (actual == null) {
assertEquals(DocValues.EMPTY_SORTED_SET, expected);
return;
}
FixedBitSet liveOrdsExpected = new FixedBitSet((int) expected.getValueCount());
FixedBitSet liveOrdsActual = new FixedBitSet((int) actual.getValueCount());
BytesRef expectedBytes = new BytesRef();
BytesRef actualBytes = new BytesRef();
// compare values for all live docs:
assertEquals(expected.getValueCount(), actual.getValueCount());
// compare ord lists
for (int i = 0; i < maxDoc; i++) {
if (liveDocs != null && liveDocs.get(i) == false) {
// Don't check deleted docs
continue;
}
expected.setDocument(i);
actual.setDocument(i);
long expectedOrd;
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
expected.lookupOrd(expectedOrd, expectedBytes);
long actualOrd = actual.nextOrd();
assertTrue(actualOrd != NO_MORE_ORDS);
actual.lookupOrd(actualOrd, actualBytes);
assertEquals(expectedBytes, actualBytes);
liveOrdsExpected.set((int) expectedOrd);
liveOrdsActual.set((int) actualOrd);
assertEquals(expectedOrd, actual.nextOrd());
}
assertEquals(NO_MORE_ORDS, actual.nextOrd());
}
// Make sure both have same number of non-deleted values:
assertEquals(liveOrdsExpected.cardinality(), liveOrdsActual.cardinality());
// compare ord dictionary
int expectedOrd = 0;
int actualOrd = 0;
while (expectedOrd < expected.getValueCount()) {
expectedOrd = liveOrdsExpected.nextSetBit(expectedOrd);
if (expectedOrd == -1) {
break;
}
actualOrd = liveOrdsActual.nextSetBit(actualOrd);
expected.lookupOrd(expectedOrd, expectedBytes);
actual.lookupOrd(actualOrd, actualBytes);
BytesRef expectedBytes = new BytesRef();
BytesRef actualBytes = new BytesRef();
for (long i = 0; i < expected.getValueCount(); i++) {
expected.lookupTerm(expectedBytes);
actual.lookupTerm(actualBytes);
assertEquals(expectedBytes, actualBytes);
expectedOrd++;
actualOrd++;
}
assertTrue(actualOrd == actual.getValueCount() || liveOrdsActual.nextSetBit(actualOrd) == -1);
// compare termsenum
assertEquals(expected.getValueCount(), expected.termsEnum(), liveOrdsExpected, actual.termsEnum(), liveOrdsActual);
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}
/** Does termsEnum.next() but then skips over deleted ords. */
private static BytesRef next(TermsEnum termsEnum, Bits liveOrds) throws IOException {
while (termsEnum.next() != null) {
if (liveOrds.get((int) termsEnum.ord())) {
return termsEnum.term();
}
}
return null;
}
/** Does termsEnum.seekCeil() but then skips over deleted ords. */
private static SeekStatus seekCeil(TermsEnum termsEnum, BytesRef term, Bits liveOrds) throws IOException {
SeekStatus status = termsEnum.seekCeil(term);
if (status == SeekStatus.END) {
return status;
} else {
if (liveOrds.get((int) termsEnum.ord()) == false) {
while (termsEnum.next() != null) {
if (liveOrds.get((int) termsEnum.ord())) {
return SeekStatus.NOT_FOUND;
}
}
return SeekStatus.END;
} else {
return status;
}
}
}
private void assertEquals(long numOrds, TermsEnum expected, Bits liveOrdsExpected, TermsEnum actual, Bits liveOrdsActual) throws Exception {
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
BytesRef ref;
// sequential next() through all terms
while ((ref = next(expected, liveOrdsExpected)) != null) {
assertEquals(ref, next(actual, liveOrdsActual));
while ((ref = expected.next()) != null) {
assertEquals(ref, actual.next());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(next(actual, liveOrdsActual));
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
if (liveOrdsExpected.get((int) i) == false) {
continue;
}
expected.seekExact(i);
assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
if (liveOrdsExpected.get((int) i) == false) {
continue;
}
expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
if (liveOrdsExpected.get((int) randomOrd) == false) {
continue;
}
expected.seekExact(randomOrd);
actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) {
if (liveOrdsExpected.get((int) i) == false) {
continue;
}
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = seekCeil(expected, target, liveOrdsExpected);
assertEquals(expectedStatus, seekCeil(actual, target, liveOrdsActual));
SeekStatus expectedStatus = expected.seekCeil(target);
assertEquals(expectedStatus, actual.seekCeil(target));
if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
}