LUCENE-5675: delete docs on flush

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596091 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-05-19 22:59:48 +00:00
parent 4e0b7974b6
commit d6968c3924
15 changed files with 420 additions and 185 deletions

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
public final class IDVersionPostingsWriter extends PushPostingsWriterBase { public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
@ -71,11 +72,9 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
@Override @Override
public void startDoc(int docID, int termDocFreq) throws IOException { public void startDoc(int docID, int termDocFreq) throws IOException {
if (lastDocID != -1) { if (lastDocID != -1) {
// nocommit need test
throw new IllegalArgumentException("term appears in more than one document"); throw new IllegalArgumentException("term appears in more than one document");
} }
if (termDocFreq != 1) { if (termDocFreq != 1) {
// nocommit need test
throw new IllegalArgumentException("term appears more than once in the document"); throw new IllegalArgumentException("term appears more than once in the document");
} }
@ -86,16 +85,13 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
@Override @Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException { public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
if (lastPosition != -1) { if (lastPosition != -1) {
// nocommit need test
throw new IllegalArgumentException("term appears more than once in document"); throw new IllegalArgumentException("term appears more than once in document");
} }
lastPosition = position; lastPosition = position;
if (payload == null) { if (payload == null) {
// nocommit need test
throw new IllegalArgumentException("token doens't have a payload"); throw new IllegalArgumentException("token doens't have a payload");
} }
if (payload.length != 8) { if (payload.length != 8) {
// nocommit need test
throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")"); throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")");
} }
@ -108,7 +104,6 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
@Override @Override
public void finishDoc() throws IOException { public void finishDoc() throws IOException {
if (lastPosition == -1) { if (lastPosition == -1) {
// nocommit need test
throw new IllegalArgumentException("missing addPosition"); throw new IllegalArgumentException("missing addPosition");
} }
} }

View File

@ -53,9 +53,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
boolean termExists; boolean termExists;
final VersionFieldReader fr; final VersionFieldReader fr;
// nocommit make this public "for casting" and add a getVersion method?
// nocommit unused?
private int targetBeforeCurrentLength; private int targetBeforeCurrentLength;
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput(); private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
@ -228,6 +225,11 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
} }
} }
/** Only valid if we are positioned. */
public long getVersion() {
return ((IDVersionTermState) currentFrame.state).idVersion;
}
/** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */ /** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */
public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException { public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
@ -357,11 +359,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
} }
currentFrame = lastFrame; currentFrame = lastFrame;
currentFrame.rewind(); currentFrame.rewind();
// nocommit put this back to BT also?
//term.length = targetUpto;
// nocommit put this back???
//termExists = false;
} else { } else {
// Target is exactly the same as current term // Target is exactly the same as current term
assert term.length == target.length; assert term.length == target.length;
@ -559,7 +556,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
if (currentFrame.maxIDVersion < minIDVersion) { if (currentFrame.maxIDVersion < minIDVersion) {
// The max version for all terms in this block is lower than the minVersion // The max version for all terms in this block is lower than the minVersion
// nocommit need same logic here as above?
termExists = false; termExists = false;
term.length = targetUpto; term.length = targetUpto;
return false; return false;

View File

@ -0,0 +1,104 @@
package org.apache.lucene.codecs.idversion;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
// nocommit can we take a BytesRef token instead?
/** Produces a single String token from the provided value, with the provided payload. */
class StringAndPayloadField extends Field {
public static final FieldType TYPE = new FieldType();
static {
TYPE.setIndexed(true);
TYPE.setOmitNorms(true);
TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
TYPE.setTokenized(true);
TYPE.freeze();
}
private final BytesRef payload;
public StringAndPayloadField(String name, String value, BytesRef payload) {
super(name, value, TYPE);
this.payload = payload;
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
SingleTokenWithPayloadTokenStream ts;
if (reuse instanceof SingleTokenWithPayloadTokenStream) {
ts = (SingleTokenWithPayloadTokenStream) reuse;
} else {
ts = new SingleTokenWithPayloadTokenStream();
}
ts.setValue((String) fieldsData, payload);
return ts;
}
private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
private boolean used = false;
private String value = null;
private BytesRef payload;
/** Sets the string value. */
void setValue(String value, BytesRef payload) {
this.value = value;
this.payload = payload;
}
@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
termAttribute.append(value);
payloadAttribute.setPayload(payload);
used = true;
return true;
}
@Override
public void reset() {
used = false;
}
@Override
public void close() {
value = null;
payload = null;
}
}
}

View File

@ -36,15 +36,18 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.BasePostingsFormatTestCase; import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.PerThreadPKLookup; import org.apache.lucene.index.PerThreadPKLookup;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -262,6 +265,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
System.out.println(" lookup exact version (should be found)"); System.out.println(" lookup exact version (should be found)");
} }
assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1); assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
assertEquals(expectedVersion.longValue(), lookup.getVersion());
} else { } else {
if (VERBOSE) { if (VERBOSE) {
System.out.println(" lookup version+1 (should not be found)"); System.out.println(" lookup version+1 (should not be found)");
@ -281,6 +285,8 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
super(r, field); super(r, field);
} }
long lastVersion;
/** Returns docID if found, else -1. */ /** Returns docID if found, else -1. */
public int lookup(BytesRef id, long version) throws IOException { public int lookup(BytesRef id, long version) throws IOException {
for(int seg=0;seg<numSegs;seg++) { for(int seg=0;seg<numSegs;seg++) {
@ -291,6 +297,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0); docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
int docID = docsEnums[seg].nextDoc(); int docID = docsEnums[seg].nextDoc();
if (docID != DocsEnum.NO_MORE_DOCS) { if (docID != DocsEnum.NO_MORE_DOCS) {
lastVersion = ((IDVersionSegmentTermsEnum) termsEnums[seg]).getVersion();
return docBases[seg] + docID; return docBases[seg] + docID;
} }
assert hasDeletions; assert hasDeletions;
@ -299,83 +306,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
return -1; return -1;
} }
}
/** Produces a single token from the provided value, with the provided payload. */ /** Only valid if lookup returned a valid docID. */
private static class StringAndPayloadField extends Field { public long getVersion() {
return lastVersion;
public static final FieldType TYPE = new FieldType();
static {
TYPE.setIndexed(true);
TYPE.setOmitNorms(true);
TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
TYPE.setTokenized(true);
TYPE.freeze();
}
private final BytesRef payload;
public StringAndPayloadField(String name, String value, BytesRef payload) {
super(name, value, TYPE);
this.payload = payload;
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
SingleTokenWithPayloadTokenStream ts;
if (reuse instanceof SingleTokenWithPayloadTokenStream) {
ts = (SingleTokenWithPayloadTokenStream) reuse;
} else {
ts = new SingleTokenWithPayloadTokenStream();
}
ts.setValue((String) fieldsData, payload);
return ts;
}
}
private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
private boolean used = false;
private String value = null;
private BytesRef payload;
/** Creates a new TokenStream that returns a String+payload as single token.
* <p>Warning: Does not initialize the value, you must call
* {@link #setValue(String)} afterwards!
*/
SingleTokenWithPayloadTokenStream() {
}
/** Sets the string value. */
void setValue(String value, BytesRef payload) {
this.value = value;
this.payload = payload;
}
@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
termAttribute.append(value);
payloadAttribute.setPayload(payload);
used = true;
return true;
}
@Override
public void reset() {
used = false;
}
@Override
public void close() {
value = null;
payload = null;
} }
} }
@ -394,8 +328,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
*/ */
} }
/*
// Invalid
public void testMoreThanOneDocPerIDOneSegment() throws Exception { public void testMoreThanOneDocPerIDOneSegment() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
@ -412,14 +344,138 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
fail("didn't hit expected exception"); fail("didn't hit expected exception");
} catch (IllegalArgumentException iae) { } catch (IllegalArgumentException iae) {
// expected // expected
iae.printStackTrace();
} }
w.close(); w.close();
dir.close(); dir.close();
} }
// Invalid
public void testMoreThanOneDocPerIDTwoSegments() throws Exception { public void testMoreThanOneDocPerIDTwoSegments() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
MergeScheduler ms = iwc.getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
@Override
protected void handleMergeException(Throwable exc) {
assertTrue(exc instanceof IllegalArgumentException);
}
});
}
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(makeIDField("id", 17));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(makeIDField("id", 17));
try {
w.addDocument(doc);
w.commit();
w.forceMerge(1);
fail("didn't hit exception");
} catch (IllegalArgumentException iae) {
// expected: SMS will hit this
} catch (IOException ioe) {
// expected
assertTrue(ioe.getCause() instanceof IllegalArgumentException);
}
w.w.close();
dir.close();
}
public void testMoreThanOneDocPerIDWithUpdates() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(makeIDField("id", 17));
w.addDocument(doc);
doc = new Document();
doc.add(makeIDField("id", 17));
// Replaces the doc we just indexed:
w.updateDocument(new Term("id", "id"), doc);
w.commit();
w.close();
dir.close();
}
public void testMoreThanOneDocPerIDWithDeletes() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(makeIDField("id", 17));
w.addDocument(doc);
w.deleteDocuments(new Term("id", "id"));
doc = new Document();
doc.add(makeIDField("id", 17));
w.addDocument(doc);
w.commit();
w.close();
dir.close();
}
public void testMissingPayload() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(newTextField("id", "id", Field.Store.NO));
try {
w.addDocument(doc);
w.commit();
fail("didn't hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
w.close();
dir.close();
}
public void testMissingPositions() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(newStringField("id", "id", Field.Store.NO));
try {
w.addDocument(doc);
w.commit();
fail("didn't hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
w.close();
dir.close();
}
public void testInvalidPayload() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo")));
try {
w.addDocument(doc);
w.commit();
fail("didn't hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
w.close();
dir.close();
}
public void testMoreThanOneDocPerIDWithDeletesAcrossSegments() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
@ -430,15 +486,29 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
w.commit(); w.commit();
doc = new Document(); doc = new Document();
doc.add(makeIDField("id", 17)); doc.add(makeIDField("id", 17));
w.addDocument(doc); // Replaces the doc we just indexed:
w.commit(); w.updateDocument(new Term("id", "id"), doc);
w.forceMerge(1); w.forceMerge(1);
w.close(); w.close();
dir.close(); dir.close();
} }
public void testMoreThanOneDocPerIDWithDeletes() { public void testMoreThanOnceInSingleDoc() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(makeIDField("id", 17));
doc.add(makeIDField("id", 17));
try {
w.addDocument(doc);
w.commit();
fail("didn't hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
w.close();
dir.close();
} }
*/
} }

View File

@ -613,7 +613,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException { void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
System.out.println("writeBlocks count=" + count); System.out.println("writeBlocks count=" + count);
// nocommit nuke the prefixLength == 0 case, but testVaryingTermsPerSegment fails!!
if (count <= maxItemsInBlock) { if (count <= maxItemsInBlock) {
// Easy case: not floor block. Eg, prefix is "foo", // Easy case: not floor block. Eg, prefix is "foo",
// and we found 30 terms/sub-blocks starting w/ that // and we found 30 terms/sub-blocks starting w/ that

View File

@ -1642,6 +1642,12 @@ public class CheckIndex {
// Only agg stats if the doc is live: // Only agg stats if the doc is live:
final boolean doStats = liveDocs == null || liveDocs.get(j); final boolean doStats = liveDocs == null || liveDocs.get(j);
if (doStats == false) {
// nocommit is it OK to stop verifying deleted docs?
continue;
}
if (doStats) { if (doStats) {
status.docCount++; status.docCount++;
} }

View File

@ -37,6 +37,8 @@ import org.apache.lucene.util.BytesRef;
class FreqProxFields extends Fields { class FreqProxFields extends Fields {
final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<>(); final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<>();
private Bits liveDocs;
public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) { public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) {
// NOTE: fields are already sorted by field name // NOTE: fields are already sorted by field name
for(FreqProxTermsWriterPerField field : fieldList) { for(FreqProxTermsWriterPerField field : fieldList) {
@ -44,6 +46,10 @@ class FreqProxFields extends Fields {
} }
} }
public void setLiveDocs(Bits liveDocs) {
this.liveDocs = liveDocs;
}
public Iterator<String> iterator() { public Iterator<String> iterator() {
return fields.keySet().iterator(); return fields.keySet().iterator();
} }
@ -51,7 +57,7 @@ class FreqProxFields extends Fields {
@Override @Override
public Terms terms(String field) throws IOException { public Terms terms(String field) throws IOException {
FreqProxTermsWriterPerField perField = fields.get(field); FreqProxTermsWriterPerField perField = fields.get(field);
return perField == null ? null : new FreqProxTerms(perField); return perField == null ? null : new FreqProxTerms(perField, liveDocs);
} }
@Override @Override
@ -62,9 +68,11 @@ class FreqProxFields extends Fields {
private static class FreqProxTerms extends Terms { private static class FreqProxTerms extends Terms {
final FreqProxTermsWriterPerField terms; final FreqProxTermsWriterPerField terms;
final Bits liveDocs;
public FreqProxTerms(FreqProxTermsWriterPerField terms) { public FreqProxTerms(FreqProxTermsWriterPerField terms, Bits liveDocs) {
this.terms = terms; this.terms = terms;
this.liveDocs = liveDocs;
} }
@Override @Override
@ -72,8 +80,9 @@ class FreqProxFields extends Fields {
FreqProxTermsEnum termsEnum; FreqProxTermsEnum termsEnum;
if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) { if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) {
termsEnum = (FreqProxTermsEnum) reuse; termsEnum = (FreqProxTermsEnum) reuse;
assert termsEnum.liveDocs == this.liveDocs;
} else { } else {
termsEnum = new FreqProxTermsEnum(terms); termsEnum = new FreqProxTermsEnum(terms, liveDocs);
} }
termsEnum.reset(); termsEnum.reset();
return termsEnum; return termsEnum;
@ -136,11 +145,13 @@ class FreqProxFields extends Fields {
final FreqProxPostingsArray postingsArray; final FreqProxPostingsArray postingsArray;
final BytesRef scratch = new BytesRef(); final BytesRef scratch = new BytesRef();
final int numTerms; final int numTerms;
final Bits liveDocs;
int ord; int ord;
public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) { public FreqProxTermsEnum(FreqProxTermsWriterPerField terms, Bits liveDocs) {
this.terms = terms; this.terms = terms;
this.numTerms = terms.bytesHash.size(); this.numTerms = terms.bytesHash.size();
this.liveDocs = liveDocs;
sortedTermIDs = terms.sortedTermIDs; sortedTermIDs = terms.sortedTermIDs;
assert sortedTermIDs != null; assert sortedTermIDs != null;
postingsArray = (FreqProxPostingsArray) terms.postingsArray; postingsArray = (FreqProxPostingsArray) terms.postingsArray;
@ -228,8 +239,8 @@ class FreqProxFields extends Fields {
} }
@Override @Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) { public DocsEnum docs(Bits liveDocsIn, DocsEnum reuse, int flags) {
if (liveDocs != null) { if (liveDocsIn != null) {
throw new IllegalArgumentException("liveDocs must be null"); throw new IllegalArgumentException("liveDocs must be null");
} }
@ -244,18 +255,20 @@ class FreqProxFields extends Fields {
if (reuse instanceof FreqProxDocsEnum) { if (reuse instanceof FreqProxDocsEnum) {
docsEnum = (FreqProxDocsEnum) reuse; docsEnum = (FreqProxDocsEnum) reuse;
if (docsEnum.postingsArray != postingsArray) { if (docsEnum.postingsArray != postingsArray) {
docsEnum = new FreqProxDocsEnum(terms, postingsArray); docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs);
} else {
assert docsEnum.liveDocs == liveDocs;
} }
} else { } else {
docsEnum = new FreqProxDocsEnum(terms, postingsArray); docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs);
} }
docsEnum.reset(sortedTermIDs[ord]); docsEnum.reset(sortedTermIDs[ord]);
return docsEnum; return docsEnum;
} }
@Override @Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { public DocsAndPositionsEnum docsAndPositions(Bits liveDocsIn, DocsAndPositionsEnum reuse, int flags) {
if (liveDocs != null) { if (liveDocsIn != null) {
throw new IllegalArgumentException("liveDocs must be null"); throw new IllegalArgumentException("liveDocs must be null");
} }
FreqProxDocsAndPositionsEnum posEnum; FreqProxDocsAndPositionsEnum posEnum;
@ -275,10 +288,12 @@ class FreqProxFields extends Fields {
if (reuse instanceof FreqProxDocsAndPositionsEnum) { if (reuse instanceof FreqProxDocsAndPositionsEnum) {
posEnum = (FreqProxDocsAndPositionsEnum) reuse; posEnum = (FreqProxDocsAndPositionsEnum) reuse;
if (posEnum.postingsArray != postingsArray) { if (posEnum.postingsArray != postingsArray) {
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray); posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs);
} else {
assert posEnum.liveDocs == liveDocs;
} }
} else { } else {
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray); posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs);
} }
posEnum.reset(sortedTermIDs[ord]); posEnum.reset(sortedTermIDs[ord]);
return posEnum; return posEnum;
@ -311,15 +326,17 @@ class FreqProxFields extends Fields {
final FreqProxPostingsArray postingsArray; final FreqProxPostingsArray postingsArray;
final ByteSliceReader reader = new ByteSliceReader(); final ByteSliceReader reader = new ByteSliceReader();
final boolean readTermFreq; final boolean readTermFreq;
final Bits liveDocs;
int docID; int docID;
int freq; int freq;
boolean ended; boolean ended;
int termID; int termID;
public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) {
this.terms = terms; this.terms = terms;
this.postingsArray = postingsArray; this.postingsArray = postingsArray;
this.readTermFreq = terms.hasFreq; this.readTermFreq = terms.hasFreq;
this.liveDocs = liveDocs;
} }
public void reset(int termID) { public void reset(int termID) {
@ -347,33 +364,39 @@ class FreqProxFields extends Fields {
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {
if (reader.eof()) { while (true) {
if (ended) { if (reader.eof()) {
return NO_MORE_DOCS; if (ended) {
} else { return NO_MORE_DOCS;
ended = true;
docID = postingsArray.lastDocIDs[termID];
if (readTermFreq) {
freq = postingsArray.termFreqs[termID];
}
}
} else {
int code = reader.readVInt();
if (!readTermFreq) {
docID += code;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else { } else {
freq = reader.readVInt(); ended = true;
docID = postingsArray.lastDocIDs[termID];
if (readTermFreq) {
freq = postingsArray.termFreqs[termID];
}
} }
} else {
int code = reader.readVInt();
if (!readTermFreq) {
docID += code;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
}
}
assert docID != postingsArray.lastDocIDs[termID];
} }
assert docID != postingsArray.lastDocIDs[termID]; if (liveDocs != null && liveDocs.get(docID) == false) {
} continue;
}
return docID; return docID;
}
} }
@Override @Override
@ -394,6 +417,7 @@ class FreqProxFields extends Fields {
final ByteSliceReader reader = new ByteSliceReader(); final ByteSliceReader reader = new ByteSliceReader();
final ByteSliceReader posReader = new ByteSliceReader(); final ByteSliceReader posReader = new ByteSliceReader();
final boolean readOffsets; final boolean readOffsets;
final Bits liveDocs;
int docID; int docID;
int freq; int freq;
int pos; int pos;
@ -405,10 +429,11 @@ class FreqProxFields extends Fields {
boolean hasPayload; boolean hasPayload;
BytesRef payload = new BytesRef(); BytesRef payload = new BytesRef();
public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) {
this.terms = terms; this.terms = terms;
this.postingsArray = postingsArray; this.postingsArray = postingsArray;
this.readOffsets = terms.hasOffsets; this.readOffsets = terms.hasOffsets;
this.liveDocs = liveDocs;
assert terms.hasProx; assert terms.hasProx;
assert terms.hasFreq; assert terms.hasFreq;
} }
@ -434,34 +459,40 @@ class FreqProxFields extends Fields {
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {
while (posLeft != 0) { while (true) {
nextPosition(); while (posLeft != 0) {
} nextPosition();
if (reader.eof()) {
if (ended) {
return NO_MORE_DOCS;
} else {
ended = true;
docID = postingsArray.lastDocIDs[termID];
freq = postingsArray.termFreqs[termID];
}
} else {
int code = reader.readVInt();
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
} }
assert docID != postingsArray.lastDocIDs[termID]; if (reader.eof()) {
} if (ended) {
return NO_MORE_DOCS;
} else {
ended = true;
docID = postingsArray.lastDocIDs[termID];
freq = postingsArray.termFreqs[termID];
}
} else {
int code = reader.readVInt();
docID += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
freq = reader.readVInt();
}
posLeft = freq; assert docID != postingsArray.lastDocIDs[termID];
pos = 0; }
startOffset = 0;
return docID; posLeft = freq;
pos = 0;
startOffset = 0;
if (liveDocs != null && liveDocs.get(docID) == false) {
continue;
}
return docID;
}
} }
@Override @Override

View File

@ -34,6 +34,8 @@ final class FreqProxTermsWriter extends TermsHash {
} }
private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException { private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
System.out.println("applyDeletes segUpdates=" + state.segUpdates);
// Process any pending Term deletes for this newly // Process any pending Term deletes for this newly
// flushed segment: // flushed segment:
if (state.segUpdates != null && state.segUpdates.terms.size() > 0) { if (state.segUpdates != null && state.segUpdates.terms.size() > 0) {
@ -98,10 +100,16 @@ final class FreqProxTermsWriter extends TermsHash {
// Sort by field name // Sort by field name
CollectionUtil.introSort(allFields); CollectionUtil.introSort(allFields);
Fields fields = new FreqProxFields(allFields); FreqProxFields fields = new FreqProxFields(allFields);
applyDeletes(state, fields); applyDeletes(state, fields);
if (state.liveDocs != null) {
fields.setLiveDocs(state.liveDocs);
}
System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state); FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
boolean success = false; boolean success = false;
try { try {

View File

@ -105,5 +105,6 @@ public class SegmentWriteState {
this.segmentSuffix = segmentSuffix; this.segmentSuffix = segmentSuffix;
segUpdates = state.segUpdates; segUpdates = state.segUpdates;
delCountOnFlush = state.delCountOnFlush; delCountOnFlush = state.delCountOnFlush;
liveDocs = state.liveDocs;
} }
} }

View File

@ -508,7 +508,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
writer.shutdown(); writer.shutdown();
IndexReader reader = DirectoryReader.open(dir); IndexReader reader = DirectoryReader.open(dir);
final Term t = new Term("content", "aa"); final Term t = new Term("content", "aa");
assertEquals(3, reader.docFreq(t)); assertEquals(2, reader.docFreq(t));
// Make sure the doc that hit the exception was marked // Make sure the doc that hit the exception was marked
// as deleted: // as deleted:
@ -648,7 +648,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
IndexReader reader = DirectoryReader.open(dir); IndexReader reader = DirectoryReader.open(dir);
if (i == 0) { if (i == 0) {
int expected = 5; int expected = 5;
assertEquals(expected, reader.docFreq(new Term("contents", "here"))); assertEquals(expected-1, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc()); assertEquals(expected, reader.maxDoc());
int numDel = 0; int numDel = 0;
final Bits liveDocs = MultiFields.getLiveDocs(reader); final Bits liveDocs = MultiFields.getLiveDocs(reader);
@ -760,8 +760,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
IndexReader reader = DirectoryReader.open(dir); IndexReader reader = DirectoryReader.open(dir);
int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER; int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER;
assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here"))); assertEquals("i=" + i, expected - NUM_THREAD*NUM_ITER, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc()); assertEquals("i=" + i, expected, reader.maxDoc());
int numDel = 0; int numDel = 0;
final Bits liveDocs = MultiFields.getLiveDocs(reader); final Bits liveDocs = MultiFields.getLiveDocs(reader);
assertNotNull(liveDocs); assertNotNull(liveDocs);

View File

@ -123,14 +123,18 @@ public class TestMultiFields extends LuceneTestCase {
} }
DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE); DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE);
assertNotNull(docsEnum); if (docsEnum == null) {
for(int docID : docs.get(term)) {
for(int docID : docs.get(term)) { assert deleted.contains(docID);
if (!deleted.contains(docID)) {
assertEquals(docID, docsEnum.nextDoc());
} }
} else {
for(int docID : docs.get(term)) {
if (!deleted.contains(docID)) {
assertEquals(docID, docsEnum.nextDoc());
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
} }
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
} }
reader.close(); reader.close();

View File

@ -891,9 +891,8 @@ public class TestTermsEnum extends LuceneTestCase {
Directory d = newDirectory(); Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d); RandomIndexWriter w = new RandomIndexWriter(random(), d);
Set<String> terms = new HashSet<String>(); Set<String> terms = new HashSet<String>();
// nocommit //String prefix = TestUtil.randomSimpleString(random(), 1, 20);
String prefix = TestUtil.randomSimpleString(random(), 1, 20); String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
//String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
int numTerms = atLeast(1000); int numTerms = atLeast(1000);
if (VERBOSE) { if (VERBOSE) {
System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix); System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix);

View File

@ -381,7 +381,7 @@ public class ToParentBlockJoinQuery extends Query {
@Override @Override
public int advance(int parentTarget) throws IOException { public int advance(int parentTarget) throws IOException {
//System.out.println("Q.advance parentTarget=" + parentTarget); // System.out.println("Q.advance parentTarget=" + parentTarget);
if (parentTarget == NO_MORE_DOCS) { if (parentTarget == NO_MORE_DOCS) {
return parentDoc = NO_MORE_DOCS; return parentDoc = NO_MORE_DOCS;
} }
@ -398,13 +398,13 @@ public class ToParentBlockJoinQuery extends Query {
prevParentDoc = parentBits.prevSetBit(parentTarget-1); prevParentDoc = parentBits.prevSetBit(parentTarget-1);
//System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc); // System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
assert prevParentDoc >= parentDoc; assert prevParentDoc >= parentDoc;
if (prevParentDoc > nextChildDoc) { if (prevParentDoc > nextChildDoc) {
nextChildDoc = childScorer.advance(prevParentDoc); nextChildDoc = childScorer.advance(prevParentDoc);
// System.out.println(" childScorer advanced to child docID=" + nextChildDoc); // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
//} else { } else {
//System.out.println(" skip childScorer advance"); // System.out.println(" skip childScorer advance");
} }
// Parent & child docs are supposed to be orthogonal: // Parent & child docs are supposed to be orthogonal:
@ -413,15 +413,21 @@ public class ToParentBlockJoinQuery extends Query {
} }
final int nd = nextDoc(); final int nd = nextDoc();
//System.out.println(" return nextParentDoc=" + nd); // System.out.println(" return nextParentDoc=" + nd);
return nd; return nd;
} }
public Explanation explain(int docBase) throws IOException { public Explanation explain(int docBase) throws IOException {
int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc int start = prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc if (acceptDocs != null) {
// Skip deleted docs:
while (acceptDocs.get(start) == false) {
start++;
}
}
int end = parentDoc - 1; // -1 b/c parentDoc is parent doc
return new ComplexExplanation( return new ComplexExplanation(
true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end) true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", docBase+start, docBase+end)
); );
} }

View File

@ -623,8 +623,14 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println("TEST: reader=" + r); System.out.println("TEST: reader=" + r);
System.out.println("TEST: joinReader=" + joinR); System.out.println("TEST: joinReader=" + joinR);
Bits liveDocs = MultiFields.getLiveDocs(joinR);
for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) { for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) {
System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX)); System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
}
DocsEnum parents = MultiFields.getTermDocsEnum(joinR, null, "isParent", new BytesRef("x"));
System.out.println("parent docIDs:");
while (parents.nextDoc() != parents.NO_MORE_DOCS) {
System.out.println(" " + parents.docID());
} }
} }
@ -823,6 +829,7 @@ public class TestBlockJoin extends LuceneTestCase {
Explanation explanation = joinS.explain(childJoinQuery, hit.doc); Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
StoredDocument document = joinS.doc(hit.doc - 1); StoredDocument document = joinS.doc(hit.doc - 1);
int childId = Integer.parseInt(document.get("childID")); int childId = Integer.parseInt(document.get("childID"));
//System.out.println(" hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
assertTrue(explanation.isMatch()); assertTrue(explanation.isMatch());
assertEquals(hit.score, explanation.getValue(), 0.0f); assertEquals(hit.score, explanation.getValue(), 0.0f);
assertEquals(String.format(Locale.ROOT, "Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription()); assertEquals(String.format(Locale.ROOT, "Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription());

View File

@ -315,11 +315,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
// delete some docs // delete some docs
// nocommit hmmm what to do
/*
int numDeletions = random().nextInt(numDocs/10); int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) { for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs); int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id))); writer.deleteDocuments(new Term("id", Integer.toString(id)));
} }
*/
writer.shutdown(); writer.shutdown();
// compare // compare
@ -379,11 +382,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
// delete some docs // delete some docs
// nocommit hmmm what to do
/*
int numDeletions = random().nextInt(numDocs/10); int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) { for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs); int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id))); writer.deleteDocuments(new Term("id", Integer.toString(id)));
} }
*/
// compare per-segment // compare per-segment
DirectoryReader ir = writer.getReader(); DirectoryReader ir = writer.getReader();
@ -443,11 +449,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
// delete some docs // delete some docs
// nocommit hmmm what to do
/*
int numDeletions = random().nextInt(numDocs/10); int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) { for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs); int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id))); writer.deleteDocuments(new Term("id", Integer.toString(id)));
} }
*/
// merge some segments and ensure that at least one of them has more than // merge some segments and ensure that at least one of them has more than
// 256 values // 256 values