mirror of https://github.com/apache/lucene.git
LUCENE-5675: finish reverting 'do not send deleted docs to PostingsFormat on flush'
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596602 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2759adc754
commit
c722f2fb9a
|
@ -1640,15 +1640,17 @@ public class CheckIndex {
|
|||
// Again, with the one doc deleted:
|
||||
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);
|
||||
|
||||
if (liveDocs != null && liveDocs.get(j) == false) {
|
||||
// Only check live docs
|
||||
continue;
|
||||
// Only agg stats if the doc is live:
|
||||
final boolean doStats = liveDocs == null || liveDocs.get(j);
|
||||
|
||||
if (doStats) {
|
||||
status.docCount++;
|
||||
}
|
||||
|
||||
status.docCount++;
|
||||
|
||||
for(String field : tfv) {
|
||||
status.totVectors++;
|
||||
if (doStats) {
|
||||
status.totVectors++;
|
||||
}
|
||||
|
||||
// Make sure FieldInfo thinks this field is vector'd:
|
||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||
|
|
|
@ -347,35 +347,33 @@ class FreqProxFields extends Fields {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while (true) {
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
if (readTermFreq) {
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
}
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
if (!readTermFreq) {
|
||||
docID += code;
|
||||
} else {
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
}
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
if (readTermFreq) {
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
if (!readTermFreq) {
|
||||
docID += code;
|
||||
} else {
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
}
|
||||
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
|
||||
return docID;
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -436,37 +434,35 @@ class FreqProxFields extends Fields {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while (true) {
|
||||
while (posLeft != 0) {
|
||||
nextPosition();
|
||||
}
|
||||
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
}
|
||||
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
|
||||
posLeft = freq;
|
||||
pos = 0;
|
||||
startOffset = 0;
|
||||
|
||||
return docID;
|
||||
while (posLeft != 0) {
|
||||
nextPosition();
|
||||
}
|
||||
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
}
|
||||
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
|
||||
posLeft = freq;
|
||||
pos = 0;
|
||||
startOffset = 0;
|
||||
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -508,7 +508,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
writer.shutdown();
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
final Term t = new Term("content", "aa");
|
||||
assertEquals(2, reader.docFreq(t));
|
||||
assertEquals(3, reader.docFreq(t));
|
||||
|
||||
// Make sure the doc that hit the exception was marked
|
||||
// as deleted:
|
||||
|
@ -648,7 +648,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
IndexReader reader = DirectoryReader.open(dir);
|
||||
if (i == 0) {
|
||||
int expected = 5;
|
||||
assertEquals(expected-1, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals(expected, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals(expected, reader.maxDoc());
|
||||
int numDel = 0;
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
|
@ -760,8 +760,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER;
|
||||
assertEquals("i=" + i, expected - NUM_THREAD*NUM_ITER, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals("i=" + i, expected, reader.maxDoc());
|
||||
assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals(expected, reader.maxDoc());
|
||||
int numDel = 0;
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
assertNotNull(liveDocs);
|
||||
|
|
|
@ -123,18 +123,14 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
}
|
||||
|
||||
DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE);
|
||||
if (docsEnum == null) {
|
||||
for(int docID : docs.get(term)) {
|
||||
assert deleted.contains(docID);
|
||||
assertNotNull(docsEnum);
|
||||
|
||||
for(int docID : docs.get(term)) {
|
||||
if (!deleted.contains(docID)) {
|
||||
assertEquals(docID, docsEnum.nextDoc());
|
||||
}
|
||||
} else {
|
||||
for(int docID : docs.get(term)) {
|
||||
if (!deleted.contains(docID)) {
|
||||
assertEquals(docID, docsEnum.nextDoc());
|
||||
}
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
|
|
@ -17,7 +17,8 @@ package org.apache.lucene.uninverting;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -46,17 +47,14 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||
|
||||
public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||
|
||||
public void testByteMissingVsFieldCache() throws Exception {
|
||||
|
@ -330,7 +328,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
AtomicReader r = context.reader();
|
||||
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
|
||||
SortedDocValues actual = r.getSortedDocValues("dv");
|
||||
assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
|
||||
assertEquals(r.maxDoc(), expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
|
@ -393,7 +391,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
AtomicReader r = context.reader();
|
||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
|
||||
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
|
||||
assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
|
||||
assertEquals(r.maxDoc(), expected, actual);
|
||||
}
|
||||
ir.close();
|
||||
|
||||
|
@ -404,7 +402,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
AtomicReader ar = getOnlySegmentReader(ir);
|
||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
|
||||
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
|
||||
assertEquals(ir.maxDoc(), ar.getLiveDocs(), expected, actual);
|
||||
assertEquals(ir.maxDoc(), expected, actual);
|
||||
ir.close();
|
||||
|
||||
writer.shutdown();
|
||||
|
@ -489,149 +487,102 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertEquals(int maxDoc, Bits liveDocs, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
||||
assertEquals(maxDoc, liveDocs, DocValues.singleton(expected), DocValues.singleton(actual));
|
||||
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
||||
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
|
||||
}
|
||||
|
||||
private void assertEquals(int maxDoc, Bits liveDocs, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
||||
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
||||
// can be null for the segment if no docs actually had any SortedDocValues
|
||||
// in this case FC.getDocTermsOrds returns EMPTY
|
||||
if (actual == null) {
|
||||
assertEquals(DocValues.EMPTY_SORTED_SET, expected);
|
||||
return;
|
||||
}
|
||||
|
||||
FixedBitSet liveOrdsExpected = new FixedBitSet((int) expected.getValueCount());
|
||||
FixedBitSet liveOrdsActual = new FixedBitSet((int) actual.getValueCount());
|
||||
|
||||
BytesRef expectedBytes = new BytesRef();
|
||||
BytesRef actualBytes = new BytesRef();
|
||||
|
||||
// compare values for all live docs:
|
||||
assertEquals(expected.getValueCount(), actual.getValueCount());
|
||||
// compare ord lists
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (liveDocs != null && liveDocs.get(i) == false) {
|
||||
// Don't check deleted docs
|
||||
continue;
|
||||
}
|
||||
expected.setDocument(i);
|
||||
actual.setDocument(i);
|
||||
long expectedOrd;
|
||||
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
|
||||
expected.lookupOrd(expectedOrd, expectedBytes);
|
||||
long actualOrd = actual.nextOrd();
|
||||
assertTrue(actualOrd != NO_MORE_ORDS);
|
||||
actual.lookupOrd(actualOrd, actualBytes);
|
||||
assertEquals(expectedBytes, actualBytes);
|
||||
liveOrdsExpected.set((int) expectedOrd);
|
||||
liveOrdsActual.set((int) actualOrd);
|
||||
assertEquals(expectedOrd, actual.nextOrd());
|
||||
}
|
||||
|
||||
assertEquals(NO_MORE_ORDS, actual.nextOrd());
|
||||
}
|
||||
|
||||
// Make sure both have same number of non-deleted values:
|
||||
assertEquals(liveOrdsExpected.cardinality(), liveOrdsActual.cardinality());
|
||||
|
||||
// compare ord dictionary
|
||||
int expectedOrd = 0;
|
||||
int actualOrd = 0;
|
||||
while (expectedOrd < expected.getValueCount()) {
|
||||
expectedOrd = liveOrdsExpected.nextSetBit(expectedOrd);
|
||||
if (expectedOrd == -1) {
|
||||
break;
|
||||
}
|
||||
actualOrd = liveOrdsActual.nextSetBit(actualOrd);
|
||||
expected.lookupOrd(expectedOrd, expectedBytes);
|
||||
actual.lookupOrd(actualOrd, actualBytes);
|
||||
BytesRef expectedBytes = new BytesRef();
|
||||
BytesRef actualBytes = new BytesRef();
|
||||
for (long i = 0; i < expected.getValueCount(); i++) {
|
||||
expected.lookupTerm(expectedBytes);
|
||||
actual.lookupTerm(actualBytes);
|
||||
assertEquals(expectedBytes, actualBytes);
|
||||
expectedOrd++;
|
||||
actualOrd++;
|
||||
}
|
||||
assertTrue(actualOrd == actual.getValueCount() || liveOrdsActual.nextSetBit(actualOrd) == -1);
|
||||
|
||||
// compare termsenum
|
||||
assertEquals(expected.getValueCount(), expected.termsEnum(), liveOrdsExpected, actual.termsEnum(), liveOrdsActual);
|
||||
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
|
||||
}
|
||||
|
||||
/** Does termsEnum.next() but then skips over deleted ords. */
|
||||
private static BytesRef next(TermsEnum termsEnum, Bits liveOrds) throws IOException {
|
||||
while (termsEnum.next() != null) {
|
||||
if (liveOrds.get((int) termsEnum.ord())) {
|
||||
return termsEnum.term();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Does termsEnum.seekCeil() but then skips over deleted ords. */
|
||||
private static SeekStatus seekCeil(TermsEnum termsEnum, BytesRef term, Bits liveOrds) throws IOException {
|
||||
SeekStatus status = termsEnum.seekCeil(term);
|
||||
if (status == SeekStatus.END) {
|
||||
return status;
|
||||
} else {
|
||||
if (liveOrds.get((int) termsEnum.ord()) == false) {
|
||||
while (termsEnum.next() != null) {
|
||||
if (liveOrds.get((int) termsEnum.ord())) {
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(long numOrds, TermsEnum expected, Bits liveOrdsExpected, TermsEnum actual, Bits liveOrdsActual) throws Exception {
|
||||
|
||||
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
|
||||
BytesRef ref;
|
||||
|
||||
// sequential next() through all terms
|
||||
while ((ref = next(expected, liveOrdsExpected)) != null) {
|
||||
assertEquals(ref, next(actual, liveOrdsActual));
|
||||
while ((ref = expected.next()) != null) {
|
||||
assertEquals(ref, actual.next());
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
assertNull(actual.next());
|
||||
|
||||
// sequential seekExact(ord) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
expected.seekExact(i);
|
||||
actual.seekExact(i);
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
assertNull(next(actual, liveOrdsActual));
|
||||
|
||||
// sequential seekExact(BytesRef) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
if (liveOrdsExpected.get((int) i) == false) {
|
||||
continue;
|
||||
}
|
||||
expected.seekExact(i);
|
||||
assertTrue(actual.seekExact(expected.term()));
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// sequential seekCeil(BytesRef) through all terms
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
if (liveOrdsExpected.get((int) i) == false) {
|
||||
continue;
|
||||
}
|
||||
expected.seekExact(i);
|
||||
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekExact(ord)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||
expected.seekExact(randomOrd);
|
||||
actual.seekExact(randomOrd);
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekExact(BytesRef)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||
if (liveOrdsExpected.get((int) randomOrd) == false) {
|
||||
continue;
|
||||
}
|
||||
expected.seekExact(randomOrd);
|
||||
actual.seekExact(expected.term());
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
|
||||
// random seekCeil(BytesRef)
|
||||
for (long i = 0; i < numOrds; i++) {
|
||||
if (liveOrdsExpected.get((int) i) == false) {
|
||||
continue;
|
||||
}
|
||||
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
|
||||
SeekStatus expectedStatus = seekCeil(expected, target, liveOrdsExpected);
|
||||
assertEquals(expectedStatus, seekCeil(actual, target, liveOrdsActual));
|
||||
SeekStatus expectedStatus = expected.seekCeil(target);
|
||||
assertEquals(expectedStatus, actual.seekCeil(target));
|
||||
if (expectedStatus != SeekStatus.END) {
|
||||
assertEquals(expected.ord(), actual.ord());
|
||||
assertEquals(expected.term(), actual.term());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue