diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java index c705a3d5672..d25ef234599 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.TermState; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; public final class IDVersionPostingsWriter extends PushPostingsWriterBase { @@ -71,11 +72,9 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase { @Override public void startDoc(int docID, int termDocFreq) throws IOException { if (lastDocID != -1) { - // nocommit need test throw new IllegalArgumentException("term appears in more than one document"); } if (termDocFreq != 1) { - // nocommit need test throw new IllegalArgumentException("term appears more than once in the document"); } @@ -86,16 +85,13 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase { @Override public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException { if (lastPosition != -1) { - // nocommit need test throw new IllegalArgumentException("term appears more than once in document"); } lastPosition = position; if (payload == null) { - // nocommit need test throw new IllegalArgumentException("token doens't have a payload"); } if (payload.length != 8) { - // nocommit need test throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")"); } @@ -108,7 +104,6 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase { @Override public void finishDoc() throws IOException { if (lastPosition == -1) { - // nocommit need test throw new IllegalArgumentException("missing addPosition"); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java index a495fd206e1..fffbffbbf69 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -53,9 +53,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { boolean termExists; final VersionFieldReader fr; - // nocommit make this public "for casting" and add a getVersion method? - - // nocommit unused? private int targetBeforeCurrentLength; private final ByteArrayDataInput scratchReader = new ByteArrayDataInput(); @@ -228,6 +225,11 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { } } + /** Only valid if we are positioned. */ + public long getVersion() { + return ((IDVersionTermState) currentFrame.state).idVersion; + } + /** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException { @@ -357,11 +359,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { } currentFrame = lastFrame; currentFrame.rewind(); - // nocommit put this back to BT also? - //term.length = targetUpto; - - // nocommit put this back??? - //termExists = false; } else { // Target is exactly the same as current term assert term.length == target.length; @@ -559,7 +556,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { if (currentFrame.maxIDVersion < minIDVersion) { // The max version for all terms in this block is lower than the minVersion - // nocommit need same logic here as above? termExists = false; term.length = targetUpto; return false; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java new file mode 100644 index 00000000000..33ae08f6dc6 --- /dev/null +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/StringAndPayloadField.java @@ -0,0 +1,104 @@ +package org.apache.lucene.codecs.idversion; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.util.BytesRef; + +// nocommit can we take a BytesRef token instead? + +/** Produces a single String token from the provided value, with the provided payload. */ +class StringAndPayloadField extends Field { + + public static final FieldType TYPE = new FieldType(); + + static { + TYPE.setIndexed(true); + TYPE.setOmitNorms(true); + TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + TYPE.setTokenized(true); + TYPE.freeze(); + } + + private final BytesRef payload; + + public StringAndPayloadField(String name, String value, BytesRef payload) { + super(name, value, TYPE); + this.payload = payload; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException { + SingleTokenWithPayloadTokenStream ts; + if (reuse instanceof SingleTokenWithPayloadTokenStream) { + ts = (SingleTokenWithPayloadTokenStream) reuse; + } else { + ts = new SingleTokenWithPayloadTokenStream(); + } + ts.setValue((String) fieldsData, payload); + return ts; + } + + private static final class SingleTokenWithPayloadTokenStream extends TokenStream { + + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class); + private boolean used = false; + private String value = null; + private BytesRef payload; + + /** Sets the string value. */ + void setValue(String value, BytesRef payload) { + this.value = value; + this.payload = payload; + } + + @Override + public boolean incrementToken() { + if (used) { + return false; + } + clearAttributes(); + termAttribute.append(value); + payloadAttribute.setPayload(payload); + used = true; + return true; + } + + @Override + public void reset() { + used = false; + } + + @Override + public void close() { + value = null; + payload = null; + } + } +} + + diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java index 44e8fa1272f..db0a6cb18b2 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java @@ -36,15 +36,18 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.PerThreadPKLookup; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -262,6 +265,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase { System.out.println(" lookup exact version (should be found)"); } assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1); + assertEquals(expectedVersion.longValue(), lookup.getVersion()); } else { if (VERBOSE) { System.out.println(" lookup version+1 (should not be found)"); @@ -281,6 +285,8 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase { super(r, field); } + long lastVersion; + /** Returns docID if found, else -1. */ public int lookup(BytesRef id, long version) throws IOException { for(int seg=0;segWarning: Does not initialize the value, you must call - * {@link #setValue(String)} afterwards! - */ - SingleTokenWithPayloadTokenStream() { - } - - /** Sets the string value. */ - void setValue(String value, BytesRef payload) { - this.value = value; - this.payload = payload; - } - - @Override - public boolean incrementToken() { - if (used) { - return false; - } - clearAttributes(); - termAttribute.append(value); - payloadAttribute.setPayload(payload); - used = true; - return true; - } - - @Override - public void reset() { - used = false; - } - - @Override - public void close() { - value = null; - payload = null; + /** Only valid if lookup returned a valid docID. */ + public long getVersion() { + return lastVersion; } } @@ -394,8 +328,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase { */ } - /* - // Invalid public void testMoreThanOneDocPerIDOneSegment() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); @@ -412,14 +344,138 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase { fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected - iae.printStackTrace(); } w.close(); dir.close(); } - // Invalid public void testMoreThanOneDocPerIDTwoSegments() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + MergeScheduler ms = iwc.getMergeScheduler(); + if (ms instanceof ConcurrentMergeScheduler) { + iwc.setMergeScheduler(new ConcurrentMergeScheduler() { + @Override + protected void handleMergeException(Throwable exc) { + assertTrue(exc instanceof IllegalArgumentException); + } + }); + } + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(makeIDField("id", 17)); + w.addDocument(doc); + w.commit(); + doc = new Document(); + doc.add(makeIDField("id", 17)); + try { + w.addDocument(doc); + w.commit(); + w.forceMerge(1); + fail("didn't hit exception"); + } catch (IllegalArgumentException iae) { + // expected: SMS will hit this + } catch (IOException ioe) { + // expected + assertTrue(ioe.getCause() instanceof IllegalArgumentException); + } + w.w.close(); + dir.close(); + } + + public void testMoreThanOneDocPerIDWithUpdates() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(makeIDField("id", 17)); + w.addDocument(doc); + doc = new Document(); + doc.add(makeIDField("id", 17)); + // Replaces the doc we just indexed: + w.updateDocument(new Term("id", "id"), doc); + w.commit(); + w.close(); + dir.close(); + } + + public void testMoreThanOneDocPerIDWithDeletes() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(makeIDField("id", 17)); + w.addDocument(doc); + w.deleteDocuments(new Term("id", "id")); + doc = new Document(); + doc.add(makeIDField("id", 17)); + w.addDocument(doc); + w.commit(); + w.close(); + dir.close(); + } + + public void testMissingPayload() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(newTextField("id", "id", Field.Store.NO)); + try { + w.addDocument(doc); + w.commit(); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + w.close(); + dir.close(); + } + + public void testMissingPositions() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(newStringField("id", "id", Field.Store.NO)); + try { + w.addDocument(doc); + w.commit(); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + w.close(); + dir.close(); + } + + public void testInvalidPayload() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo"))); + try { + w.addDocument(doc); + w.commit(); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + + w.close(); + dir.close(); + } + + public void testMoreThanOneDocPerIDWithDeletesAcrossSegments() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); @@ -430,15 +486,29 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase { w.commit(); doc = new Document(); doc.add(makeIDField("id", 17)); - w.addDocument(doc); - w.commit(); + // Replaces the doc we just indexed: + w.updateDocument(new Term("id", "id"), doc); w.forceMerge(1); w.close(); dir.close(); } - public void testMoreThanOneDocPerIDWithDeletes() { - + public void testMoreThanOnceInSingleDoc() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + doc.add(makeIDField("id", 17)); + doc.add(makeIDField("id", 17)); + try { + w.addDocument(doc); + w.commit(); + fail("didn't hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + w.close(); + dir.close(); } - */ } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java index ac0cfbbc8c3..90655dccb14 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java @@ -613,7 +613,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException { System.out.println("writeBlocks count=" + count); - // nocommit nuke the prefixLength == 0 case, but testVaryingTermsPerSegment fails!! if (count <= maxItemsInBlock) { // Easy case: not floor block. Eg, prefix is "foo", // and we found 30 terms/sub-blocks starting w/ that diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 1d1a4457df6..10682263269 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1642,6 +1642,12 @@ public class CheckIndex { // Only agg stats if the doc is live: final boolean doStats = liveDocs == null || liveDocs.get(j); + + if (doStats == false) { + // nocommit is it OK to stop verifying deleted docs? + continue; + } + if (doStats) { status.docCount++; } diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java index 48563397b3c..f9ebaf3e481 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java @@ -37,6 +37,8 @@ import org.apache.lucene.util.BytesRef; class FreqProxFields extends Fields { final Map fields = new LinkedHashMap<>(); + private Bits liveDocs; + public FreqProxFields(List fieldList) { // NOTE: fields are already sorted by field name for(FreqProxTermsWriterPerField field : fieldList) { @@ -44,6 +46,10 @@ class FreqProxFields extends Fields { } } + public void setLiveDocs(Bits liveDocs) { + this.liveDocs = liveDocs; + } + public Iterator iterator() { return fields.keySet().iterator(); } @@ -51,7 +57,7 @@ class FreqProxFields extends Fields { @Override public Terms terms(String field) throws IOException { FreqProxTermsWriterPerField perField = fields.get(field); - return perField == null ? null : new FreqProxTerms(perField); + return perField == null ? null : new FreqProxTerms(perField, liveDocs); } @Override @@ -62,9 +68,11 @@ class FreqProxFields extends Fields { private static class FreqProxTerms extends Terms { final FreqProxTermsWriterPerField terms; + final Bits liveDocs; - public FreqProxTerms(FreqProxTermsWriterPerField terms) { + public FreqProxTerms(FreqProxTermsWriterPerField terms, Bits liveDocs) { this.terms = terms; + this.liveDocs = liveDocs; } @Override @@ -72,8 +80,9 @@ class FreqProxFields extends Fields { FreqProxTermsEnum termsEnum; if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) { termsEnum = (FreqProxTermsEnum) reuse; + assert termsEnum.liveDocs == this.liveDocs; } else { - termsEnum = new FreqProxTermsEnum(terms); + termsEnum = new FreqProxTermsEnum(terms, liveDocs); } termsEnum.reset(); return termsEnum; @@ -136,11 +145,13 @@ class FreqProxFields extends Fields { final FreqProxPostingsArray postingsArray; final BytesRef scratch = new BytesRef(); final int numTerms; + final Bits liveDocs; int ord; - public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) { + public FreqProxTermsEnum(FreqProxTermsWriterPerField terms, Bits liveDocs) { this.terms = terms; this.numTerms = terms.bytesHash.size(); + this.liveDocs = liveDocs; sortedTermIDs = terms.sortedTermIDs; assert sortedTermIDs != null; postingsArray = (FreqProxPostingsArray) terms.postingsArray; @@ -228,8 +239,8 @@ class FreqProxFields extends Fields { } @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) { - if (liveDocs != null) { + public DocsEnum docs(Bits liveDocsIn, DocsEnum reuse, int flags) { + if (liveDocsIn != null) { throw new IllegalArgumentException("liveDocs must be null"); } @@ -244,18 +255,20 @@ class FreqProxFields extends Fields { if (reuse instanceof FreqProxDocsEnum) { docsEnum = (FreqProxDocsEnum) reuse; if (docsEnum.postingsArray != postingsArray) { - docsEnum = new FreqProxDocsEnum(terms, postingsArray); + docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs); + } else { + assert docsEnum.liveDocs == liveDocs; } } else { - docsEnum = new FreqProxDocsEnum(terms, postingsArray); + docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs); } docsEnum.reset(sortedTermIDs[ord]); return docsEnum; } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { - if (liveDocs != null) { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocsIn, DocsAndPositionsEnum reuse, int flags) { + if (liveDocsIn != null) { throw new IllegalArgumentException("liveDocs must be null"); } FreqProxDocsAndPositionsEnum posEnum; @@ -275,10 +288,12 @@ class FreqProxFields extends Fields { if (reuse instanceof FreqProxDocsAndPositionsEnum) { posEnum = (FreqProxDocsAndPositionsEnum) reuse; if (posEnum.postingsArray != postingsArray) { - posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray); + posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs); + } else { + assert posEnum.liveDocs == liveDocs; } } else { - posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray); + posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs); } posEnum.reset(sortedTermIDs[ord]); return posEnum; @@ -311,15 +326,17 @@ class FreqProxFields extends Fields { final FreqProxPostingsArray postingsArray; final ByteSliceReader reader = new ByteSliceReader(); final boolean readTermFreq; + final Bits liveDocs; int docID; int freq; boolean ended; int termID; - public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { + public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) { this.terms = terms; this.postingsArray = postingsArray; this.readTermFreq = terms.hasFreq; + this.liveDocs = liveDocs; } public void reset(int termID) { @@ -347,33 +364,39 @@ class FreqProxFields extends Fields { @Override public int nextDoc() throws IOException { - if (reader.eof()) { - if (ended) { - return NO_MORE_DOCS; - } else { - ended = true; - docID = postingsArray.lastDocIDs[termID]; - if (readTermFreq) { - freq = postingsArray.termFreqs[termID]; - } - } - } else { - int code = reader.readVInt(); - if (!readTermFreq) { - docID += code; - } else { - docID += code >>> 1; - if ((code & 1) != 0) { - freq = 1; + while (true) { + if (reader.eof()) { + if (ended) { + return NO_MORE_DOCS; } else { - freq = reader.readVInt(); + ended = true; + docID = postingsArray.lastDocIDs[termID]; + if (readTermFreq) { + freq = postingsArray.termFreqs[termID]; + } } + } else { + int code = reader.readVInt(); + if (!readTermFreq) { + docID += code; + } else { + docID += code >>> 1; + if ((code & 1) != 0) { + freq = 1; + } else { + freq = reader.readVInt(); + } + } + + assert docID != postingsArray.lastDocIDs[termID]; } - assert docID != postingsArray.lastDocIDs[termID]; - } + if (liveDocs != null && liveDocs.get(docID) == false) { + continue; + } - return docID; + return docID; + } } @Override @@ -394,6 +417,7 @@ class FreqProxFields extends Fields { final ByteSliceReader reader = new ByteSliceReader(); final ByteSliceReader posReader = new ByteSliceReader(); final boolean readOffsets; + final Bits liveDocs; int docID; int freq; int pos; @@ -405,10 +429,11 @@ class FreqProxFields extends Fields { boolean hasPayload; BytesRef payload = new BytesRef(); - public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { + public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) { this.terms = terms; this.postingsArray = postingsArray; this.readOffsets = terms.hasOffsets; + this.liveDocs = liveDocs; assert terms.hasProx; assert terms.hasFreq; } @@ -434,34 +459,40 @@ class FreqProxFields extends Fields { @Override public int nextDoc() throws IOException { - while (posLeft != 0) { - nextPosition(); - } - - if (reader.eof()) { - if (ended) { - return NO_MORE_DOCS; - } else { - ended = true; - docID = postingsArray.lastDocIDs[termID]; - freq = postingsArray.termFreqs[termID]; - } - } else { - int code = reader.readVInt(); - docID += code >>> 1; - if ((code & 1) != 0) { - freq = 1; - } else { - freq = reader.readVInt(); + while (true) { + while (posLeft != 0) { + nextPosition(); } - assert docID != postingsArray.lastDocIDs[termID]; - } + if (reader.eof()) { + if (ended) { + return NO_MORE_DOCS; + } else { + ended = true; + docID = postingsArray.lastDocIDs[termID]; + freq = postingsArray.termFreqs[termID]; + } + } else { + int code = reader.readVInt(); + docID += code >>> 1; + if ((code & 1) != 0) { + freq = 1; + } else { + freq = reader.readVInt(); + } - posLeft = freq; - pos = 0; - startOffset = 0; - return docID; + assert docID != postingsArray.lastDocIDs[termID]; + } + + posLeft = freq; + pos = 0; + startOffset = 0; + if (liveDocs != null && liveDocs.get(docID) == false) { + continue; + } + + return docID; + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java index c7c7c6d5009..edd519386d1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java @@ -34,6 +34,8 @@ final class FreqProxTermsWriter extends TermsHash { } private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException { + System.out.println("applyDeletes segUpdates=" + state.segUpdates); + // Process any pending Term deletes for this newly // flushed segment: if (state.segUpdates != null && state.segUpdates.terms.size() > 0) { @@ -98,10 +100,16 @@ final class FreqProxTermsWriter extends TermsHash { // Sort by field name CollectionUtil.introSort(allFields); - Fields fields = new FreqProxFields(allFields); + FreqProxFields fields = new FreqProxFields(allFields); applyDeletes(state, fields); + if (state.liveDocs != null) { + fields.setLiveDocs(state.liveDocs); + } + + System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat()); + FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state); boolean success = false; try { diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java index 0af6ba90d22..5ae23f8c0a4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -105,5 +105,6 @@ public class SegmentWriteState { this.segmentSuffix = segmentSuffix; segUpdates = state.segUpdates; delCountOnFlush = state.delCountOnFlush; + liveDocs = state.liveDocs; } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index 8d78d5d2217..c77ebc7c0b1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -508,7 +508,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { writer.shutdown(); IndexReader reader = DirectoryReader.open(dir); final Term t = new Term("content", "aa"); - assertEquals(3, reader.docFreq(t)); + assertEquals(2, reader.docFreq(t)); // Make sure the doc that hit the exception was marked // as deleted: @@ -648,7 +648,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { IndexReader reader = DirectoryReader.open(dir); if (i == 0) { int expected = 5; - assertEquals(expected, reader.docFreq(new Term("contents", "here"))); + assertEquals(expected-1, reader.docFreq(new Term("contents", "here"))); assertEquals(expected, reader.maxDoc()); int numDel = 0; final Bits liveDocs = MultiFields.getLiveDocs(reader); @@ -760,8 +760,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase { IndexReader reader = DirectoryReader.open(dir); int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER; - assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here"))); - assertEquals(expected, reader.maxDoc()); + assertEquals("i=" + i, expected - NUM_THREAD*NUM_ITER, reader.docFreq(new Term("contents", "here"))); + assertEquals("i=" + i, expected, reader.maxDoc()); int numDel = 0; final Bits liveDocs = MultiFields.getLiveDocs(reader); assertNotNull(liveDocs); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java index 6eed2679033..9cd4168299c 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java @@ -123,14 +123,18 @@ public class TestMultiFields extends LuceneTestCase { } DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE); - assertNotNull(docsEnum); - - for(int docID : docs.get(term)) { - if (!deleted.contains(docID)) { - assertEquals(docID, docsEnum.nextDoc()); + if (docsEnum == null) { + for(int docID : docs.get(term)) { + assert deleted.contains(docID); } + } else { + for(int docID : docs.get(term)) { + if (!deleted.contains(docID)) { + assertEquals(docID, docsEnum.nextDoc()); + } + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc()); } - assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc()); } reader.close(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java index 7bdf3837301..ecf4e9b05ca 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java @@ -891,9 +891,8 @@ public class TestTermsEnum extends LuceneTestCase { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Set terms = new HashSet(); - // nocommit - String prefix = TestUtil.randomSimpleString(random(), 1, 20); - //String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20); + //String prefix = TestUtil.randomSimpleString(random(), 1, 20); + String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20); int numTerms = atLeast(1000); if (VERBOSE) { System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix); diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index f6985e2fa55..290b3b1c098 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -381,7 +381,7 @@ public class ToParentBlockJoinQuery extends Query { @Override public int advance(int parentTarget) throws IOException { - //System.out.println("Q.advance parentTarget=" + parentTarget); + // System.out.println("Q.advance parentTarget=" + parentTarget); if (parentTarget == NO_MORE_DOCS) { return parentDoc = NO_MORE_DOCS; } @@ -398,13 +398,13 @@ public class ToParentBlockJoinQuery extends Query { prevParentDoc = parentBits.prevSetBit(parentTarget-1); - //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc); + // System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc); assert prevParentDoc >= parentDoc; if (prevParentDoc > nextChildDoc) { nextChildDoc = childScorer.advance(prevParentDoc); // System.out.println(" childScorer advanced to child docID=" + nextChildDoc); - //} else { - //System.out.println(" skip childScorer advance"); + } else { + // System.out.println(" skip childScorer advance"); } // Parent & child docs are supposed to be orthogonal: @@ -413,15 +413,21 @@ public class ToParentBlockJoinQuery extends Query { } final int nd = nextDoc(); - //System.out.println(" return nextParentDoc=" + nd); + // System.out.println(" return nextParentDoc=" + nd); return nd; } public Explanation explain(int docBase) throws IOException { - int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc - int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc + int start = prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc + if (acceptDocs != null) { + // Skip deleted docs: + while (acceptDocs.get(start) == false) { + start++; + } + } + int end = parentDoc - 1; // -1 b/c parentDoc is parent doc return new ComplexExplanation( - true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end) + true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", docBase+start, docBase+end) ); } diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index cae594e0c43..48c33fe4234 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -623,8 +623,14 @@ public class TestBlockJoin extends LuceneTestCase { System.out.println("TEST: reader=" + r); System.out.println("TEST: joinReader=" + joinR); + Bits liveDocs = MultiFields.getLiveDocs(joinR); for(int docIDX=0;docIDX