mirror of https://github.com/apache/lucene.git
LUCENE-5675: delete docs on flush
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596091 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4e0b7974b6
commit
d6968c3924
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
||||
|
@ -71,11 +72,9 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
@Override
|
||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||
if (lastDocID != -1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("term appears in more than one document");
|
||||
}
|
||||
if (termDocFreq != 1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("term appears more than once in the document");
|
||||
}
|
||||
|
||||
|
@ -86,16 +85,13 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
@Override
|
||||
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||
if (lastPosition != -1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("term appears more than once in document");
|
||||
}
|
||||
lastPosition = position;
|
||||
if (payload == null) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("token doens't have a payload");
|
||||
}
|
||||
if (payload.length != 8) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")");
|
||||
}
|
||||
|
||||
|
@ -108,7 +104,6 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
@Override
|
||||
public void finishDoc() throws IOException {
|
||||
if (lastPosition == -1) {
|
||||
// nocommit need test
|
||||
throw new IllegalArgumentException("missing addPosition");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,9 +53,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
boolean termExists;
|
||||
final VersionFieldReader fr;
|
||||
|
||||
// nocommit make this public "for casting" and add a getVersion method?
|
||||
|
||||
// nocommit unused?
|
||||
private int targetBeforeCurrentLength;
|
||||
|
||||
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
|
||||
|
@ -228,6 +225,11 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
/** Only valid if we are positioned. */
|
||||
public long getVersion() {
|
||||
return ((IDVersionTermState) currentFrame.state).idVersion;
|
||||
}
|
||||
|
||||
/** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */
|
||||
public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
|
||||
|
||||
|
@ -357,11 +359,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
currentFrame = lastFrame;
|
||||
currentFrame.rewind();
|
||||
// nocommit put this back to BT also?
|
||||
//term.length = targetUpto;
|
||||
|
||||
// nocommit put this back???
|
||||
//termExists = false;
|
||||
} else {
|
||||
// Target is exactly the same as current term
|
||||
assert term.length == target.length;
|
||||
|
@ -559,7 +556,6 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
// nocommit need same logic here as above?
|
||||
termExists = false;
|
||||
term.length = targetUpto;
|
||||
return false;
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
package org.apache.lucene.codecs.idversion;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
// nocommit can we take a BytesRef token instead?
|
||||
|
||||
/** Produces a single String token from the provided value, with the provided payload. */
|
||||
class StringAndPayloadField extends Field {
|
||||
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
|
||||
static {
|
||||
TYPE.setIndexed(true);
|
||||
TYPE.setOmitNorms(true);
|
||||
TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
TYPE.setTokenized(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
private final BytesRef payload;
|
||||
|
||||
public StringAndPayloadField(String name, String value, BytesRef payload) {
|
||||
super(name, value, TYPE);
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
|
||||
SingleTokenWithPayloadTokenStream ts;
|
||||
if (reuse instanceof SingleTokenWithPayloadTokenStream) {
|
||||
ts = (SingleTokenWithPayloadTokenStream) reuse;
|
||||
} else {
|
||||
ts = new SingleTokenWithPayloadTokenStream();
|
||||
}
|
||||
ts.setValue((String) fieldsData, payload);
|
||||
return ts;
|
||||
}
|
||||
|
||||
private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
|
||||
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
|
||||
private boolean used = false;
|
||||
private String value = null;
|
||||
private BytesRef payload;
|
||||
|
||||
/** Sets the string value. */
|
||||
void setValue(String value, BytesRef payload) {
|
||||
this.value = value;
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (used) {
|
||||
return false;
|
||||
}
|
||||
clearAttributes();
|
||||
termAttribute.append(value);
|
||||
payloadAttribute.setPayload(payload);
|
||||
used = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
used = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
value = null;
|
||||
payload = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -36,15 +36,18 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.PerThreadPKLookup;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -262,6 +265,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
System.out.println(" lookup exact version (should be found)");
|
||||
}
|
||||
assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
|
||||
assertEquals(expectedVersion.longValue(), lookup.getVersion());
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" lookup version+1 (should not be found)");
|
||||
|
@ -281,6 +285,8 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
super(r, field);
|
||||
}
|
||||
|
||||
long lastVersion;
|
||||
|
||||
/** Returns docID if found, else -1. */
|
||||
public int lookup(BytesRef id, long version) throws IOException {
|
||||
for(int seg=0;seg<numSegs;seg++) {
|
||||
|
@ -291,6 +297,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
|
||||
int docID = docsEnums[seg].nextDoc();
|
||||
if (docID != DocsEnum.NO_MORE_DOCS) {
|
||||
lastVersion = ((IDVersionSegmentTermsEnum) termsEnums[seg]).getVersion();
|
||||
return docBases[seg] + docID;
|
||||
}
|
||||
assert hasDeletions;
|
||||
|
@ -299,83 +306,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/** Produces a single token from the provided value, with the provided payload. */
|
||||
private static class StringAndPayloadField extends Field {
|
||||
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
|
||||
static {
|
||||
TYPE.setIndexed(true);
|
||||
TYPE.setOmitNorms(true);
|
||||
TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
TYPE.setTokenized(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
private final BytesRef payload;
|
||||
|
||||
public StringAndPayloadField(String name, String value, BytesRef payload) {
|
||||
super(name, value, TYPE);
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException {
|
||||
SingleTokenWithPayloadTokenStream ts;
|
||||
if (reuse instanceof SingleTokenWithPayloadTokenStream) {
|
||||
ts = (SingleTokenWithPayloadTokenStream) reuse;
|
||||
} else {
|
||||
ts = new SingleTokenWithPayloadTokenStream();
|
||||
}
|
||||
ts.setValue((String) fieldsData, payload);
|
||||
return ts;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class SingleTokenWithPayloadTokenStream extends TokenStream {
|
||||
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final PayloadAttribute payloadAttribute = addAttribute(PayloadAttribute.class);
|
||||
private boolean used = false;
|
||||
private String value = null;
|
||||
private BytesRef payload;
|
||||
|
||||
/** Creates a new TokenStream that returns a String+payload as single token.
|
||||
* <p>Warning: Does not initialize the value, you must call
|
||||
* {@link #setValue(String)} afterwards!
|
||||
*/
|
||||
SingleTokenWithPayloadTokenStream() {
|
||||
}
|
||||
|
||||
/** Sets the string value. */
|
||||
void setValue(String value, BytesRef payload) {
|
||||
this.value = value;
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (used) {
|
||||
return false;
|
||||
}
|
||||
clearAttributes();
|
||||
termAttribute.append(value);
|
||||
payloadAttribute.setPayload(payload);
|
||||
used = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
used = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
value = null;
|
||||
payload = null;
|
||||
/** Only valid if lookup returned a valid docID. */
|
||||
public long getVersion() {
|
||||
return lastVersion;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -394,8 +328,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
// Invalid
|
||||
public void testMoreThanOneDocPerIDOneSegment() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
|
@ -412,14 +344,138 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
iae.printStackTrace();
|
||||
}
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Invalid
|
||||
public void testMoreThanOneDocPerIDTwoSegments() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
MergeScheduler ms = iwc.getMergeScheduler();
|
||||
if (ms instanceof ConcurrentMergeScheduler) {
|
||||
iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
|
||||
@Override
|
||||
protected void handleMergeException(Throwable exc) {
|
||||
assertTrue(exc instanceof IllegalArgumentException);
|
||||
}
|
||||
});
|
||||
}
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
w.forceMerge(1);
|
||||
fail("didn't hit exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected: SMS will hit this
|
||||
} catch (IOException ioe) {
|
||||
// expected
|
||||
assertTrue(ioe.getCause() instanceof IllegalArgumentException);
|
||||
}
|
||||
w.w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMoreThanOneDocPerIDWithUpdates() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
// Replaces the doc we just indexed:
|
||||
w.updateDocument(new Term("id", "id"), doc);
|
||||
w.commit();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMoreThanOneDocPerIDWithDeletes() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
w.addDocument(doc);
|
||||
w.deleteDocuments(new Term("id", "id"));
|
||||
doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMissingPayload() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("id", "id", Field.Store.NO));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMissingPositions() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "id", Field.Store.NO));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testInvalidPayload() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo")));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMoreThanOneDocPerIDWithDeletesAcrossSegments() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
|
@ -430,15 +486,29 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
w.commit();
|
||||
doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
// Replaces the doc we just indexed:
|
||||
w.updateDocument(new Term("id", "id"), doc);
|
||||
w.forceMerge(1);
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMoreThanOneDocPerIDWithDeletes() {
|
||||
|
||||
public void testMoreThanOnceInSingleDoc() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(makeIDField("id", 17));
|
||||
doc.add(makeIDField("id", 17));
|
||||
try {
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
fail("didn't hit expected exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -613,7 +613,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
|
||||
System.out.println("writeBlocks count=" + count);
|
||||
// nocommit nuke the prefixLength == 0 case, but testVaryingTermsPerSegment fails!!
|
||||
if (count <= maxItemsInBlock) {
|
||||
// Easy case: not floor block. Eg, prefix is "foo",
|
||||
// and we found 30 terms/sub-blocks starting w/ that
|
||||
|
|
|
@ -1642,6 +1642,12 @@ public class CheckIndex {
|
|||
|
||||
// Only agg stats if the doc is live:
|
||||
final boolean doStats = liveDocs == null || liveDocs.get(j);
|
||||
|
||||
if (doStats == false) {
|
||||
// nocommit is it OK to stop verifying deleted docs?
|
||||
continue;
|
||||
}
|
||||
|
||||
if (doStats) {
|
||||
status.docCount++;
|
||||
}
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
class FreqProxFields extends Fields {
|
||||
final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<>();
|
||||
|
||||
private Bits liveDocs;
|
||||
|
||||
public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) {
|
||||
// NOTE: fields are already sorted by field name
|
||||
for(FreqProxTermsWriterPerField field : fieldList) {
|
||||
|
@ -44,6 +46,10 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
public void setLiveDocs(Bits liveDocs) {
|
||||
this.liveDocs = liveDocs;
|
||||
}
|
||||
|
||||
public Iterator<String> iterator() {
|
||||
return fields.keySet().iterator();
|
||||
}
|
||||
|
@ -51,7 +57,7 @@ class FreqProxFields extends Fields {
|
|||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
FreqProxTermsWriterPerField perField = fields.get(field);
|
||||
return perField == null ? null : new FreqProxTerms(perField);
|
||||
return perField == null ? null : new FreqProxTerms(perField, liveDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -62,9 +68,11 @@ class FreqProxFields extends Fields {
|
|||
|
||||
private static class FreqProxTerms extends Terms {
|
||||
final FreqProxTermsWriterPerField terms;
|
||||
final Bits liveDocs;
|
||||
|
||||
public FreqProxTerms(FreqProxTermsWriterPerField terms) {
|
||||
public FreqProxTerms(FreqProxTermsWriterPerField terms, Bits liveDocs) {
|
||||
this.terms = terms;
|
||||
this.liveDocs = liveDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -72,8 +80,9 @@ class FreqProxFields extends Fields {
|
|||
FreqProxTermsEnum termsEnum;
|
||||
if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) {
|
||||
termsEnum = (FreqProxTermsEnum) reuse;
|
||||
assert termsEnum.liveDocs == this.liveDocs;
|
||||
} else {
|
||||
termsEnum = new FreqProxTermsEnum(terms);
|
||||
termsEnum = new FreqProxTermsEnum(terms, liveDocs);
|
||||
}
|
||||
termsEnum.reset();
|
||||
return termsEnum;
|
||||
|
@ -136,11 +145,13 @@ class FreqProxFields extends Fields {
|
|||
final FreqProxPostingsArray postingsArray;
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final int numTerms;
|
||||
final Bits liveDocs;
|
||||
int ord;
|
||||
|
||||
public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) {
|
||||
public FreqProxTermsEnum(FreqProxTermsWriterPerField terms, Bits liveDocs) {
|
||||
this.terms = terms;
|
||||
this.numTerms = terms.bytesHash.size();
|
||||
this.liveDocs = liveDocs;
|
||||
sortedTermIDs = terms.sortedTermIDs;
|
||||
assert sortedTermIDs != null;
|
||||
postingsArray = (FreqProxPostingsArray) terms.postingsArray;
|
||||
|
@ -228,8 +239,8 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
|
||||
if (liveDocs != null) {
|
||||
public DocsEnum docs(Bits liveDocsIn, DocsEnum reuse, int flags) {
|
||||
if (liveDocsIn != null) {
|
||||
throw new IllegalArgumentException("liveDocs must be null");
|
||||
}
|
||||
|
||||
|
@ -244,18 +255,20 @@ class FreqProxFields extends Fields {
|
|||
if (reuse instanceof FreqProxDocsEnum) {
|
||||
docsEnum = (FreqProxDocsEnum) reuse;
|
||||
if (docsEnum.postingsArray != postingsArray) {
|
||||
docsEnum = new FreqProxDocsEnum(terms, postingsArray);
|
||||
docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs);
|
||||
} else {
|
||||
assert docsEnum.liveDocs == liveDocs;
|
||||
}
|
||||
} else {
|
||||
docsEnum = new FreqProxDocsEnum(terms, postingsArray);
|
||||
docsEnum = new FreqProxDocsEnum(terms, postingsArray, liveDocs);
|
||||
}
|
||||
docsEnum.reset(sortedTermIDs[ord]);
|
||||
return docsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
|
||||
if (liveDocs != null) {
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocsIn, DocsAndPositionsEnum reuse, int flags) {
|
||||
if (liveDocsIn != null) {
|
||||
throw new IllegalArgumentException("liveDocs must be null");
|
||||
}
|
||||
FreqProxDocsAndPositionsEnum posEnum;
|
||||
|
@ -275,10 +288,12 @@ class FreqProxFields extends Fields {
|
|||
if (reuse instanceof FreqProxDocsAndPositionsEnum) {
|
||||
posEnum = (FreqProxDocsAndPositionsEnum) reuse;
|
||||
if (posEnum.postingsArray != postingsArray) {
|
||||
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
|
||||
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs);
|
||||
} else {
|
||||
assert posEnum.liveDocs == liveDocs;
|
||||
}
|
||||
} else {
|
||||
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
|
||||
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray, liveDocs);
|
||||
}
|
||||
posEnum.reset(sortedTermIDs[ord]);
|
||||
return posEnum;
|
||||
|
@ -311,15 +326,17 @@ class FreqProxFields extends Fields {
|
|||
final FreqProxPostingsArray postingsArray;
|
||||
final ByteSliceReader reader = new ByteSliceReader();
|
||||
final boolean readTermFreq;
|
||||
final Bits liveDocs;
|
||||
int docID;
|
||||
int freq;
|
||||
boolean ended;
|
||||
int termID;
|
||||
|
||||
public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
|
||||
public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) {
|
||||
this.terms = terms;
|
||||
this.postingsArray = postingsArray;
|
||||
this.readTermFreq = terms.hasFreq;
|
||||
this.liveDocs = liveDocs;
|
||||
}
|
||||
|
||||
public void reset(int termID) {
|
||||
|
@ -347,33 +364,39 @@ class FreqProxFields extends Fields {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
if (readTermFreq) {
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
if (!readTermFreq) {
|
||||
docID += code;
|
||||
} else {
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
while (true) {
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
if (readTermFreq) {
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
if (!readTermFreq) {
|
||||
docID += code;
|
||||
} else {
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
if (liveDocs != null && liveDocs.get(docID) == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return docID;
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -394,6 +417,7 @@ class FreqProxFields extends Fields {
|
|||
final ByteSliceReader reader = new ByteSliceReader();
|
||||
final ByteSliceReader posReader = new ByteSliceReader();
|
||||
final boolean readOffsets;
|
||||
final Bits liveDocs;
|
||||
int docID;
|
||||
int freq;
|
||||
int pos;
|
||||
|
@ -405,10 +429,11 @@ class FreqProxFields extends Fields {
|
|||
boolean hasPayload;
|
||||
BytesRef payload = new BytesRef();
|
||||
|
||||
public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
|
||||
public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray, Bits liveDocs) {
|
||||
this.terms = terms;
|
||||
this.postingsArray = postingsArray;
|
||||
this.readOffsets = terms.hasOffsets;
|
||||
this.liveDocs = liveDocs;
|
||||
assert terms.hasProx;
|
||||
assert terms.hasFreq;
|
||||
}
|
||||
|
@ -434,34 +459,40 @@ class FreqProxFields extends Fields {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while (posLeft != 0) {
|
||||
nextPosition();
|
||||
}
|
||||
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
while (true) {
|
||||
while (posLeft != 0) {
|
||||
nextPosition();
|
||||
}
|
||||
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
ended = true;
|
||||
docID = postingsArray.lastDocIDs[termID];
|
||||
freq = postingsArray.termFreqs[termID];
|
||||
}
|
||||
} else {
|
||||
int code = reader.readVInt();
|
||||
docID += code >>> 1;
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
freq = reader.readVInt();
|
||||
}
|
||||
|
||||
posLeft = freq;
|
||||
pos = 0;
|
||||
startOffset = 0;
|
||||
return docID;
|
||||
assert docID != postingsArray.lastDocIDs[termID];
|
||||
}
|
||||
|
||||
posLeft = freq;
|
||||
pos = 0;
|
||||
startOffset = 0;
|
||||
if (liveDocs != null && liveDocs.get(docID) == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return docID;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -34,6 +34,8 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
}
|
||||
|
||||
private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
|
||||
System.out.println("applyDeletes segUpdates=" + state.segUpdates);
|
||||
|
||||
// Process any pending Term deletes for this newly
|
||||
// flushed segment:
|
||||
if (state.segUpdates != null && state.segUpdates.terms.size() > 0) {
|
||||
|
@ -98,10 +100,16 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
// Sort by field name
|
||||
CollectionUtil.introSort(allFields);
|
||||
|
||||
Fields fields = new FreqProxFields(allFields);
|
||||
FreqProxFields fields = new FreqProxFields(allFields);
|
||||
|
||||
applyDeletes(state, fields);
|
||||
|
||||
if (state.liveDocs != null) {
|
||||
fields.setLiveDocs(state.liveDocs);
|
||||
}
|
||||
|
||||
System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
|
||||
|
||||
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
|
||||
boolean success = false;
|
||||
try {
|
||||
|
|
|
@ -105,5 +105,6 @@ public class SegmentWriteState {
|
|||
this.segmentSuffix = segmentSuffix;
|
||||
segUpdates = state.segUpdates;
|
||||
delCountOnFlush = state.delCountOnFlush;
|
||||
liveDocs = state.liveDocs;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -508,7 +508,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
writer.shutdown();
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
final Term t = new Term("content", "aa");
|
||||
assertEquals(3, reader.docFreq(t));
|
||||
assertEquals(2, reader.docFreq(t));
|
||||
|
||||
// Make sure the doc that hit the exception was marked
|
||||
// as deleted:
|
||||
|
@ -648,7 +648,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
IndexReader reader = DirectoryReader.open(dir);
|
||||
if (i == 0) {
|
||||
int expected = 5;
|
||||
assertEquals(expected, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals(expected-1, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals(expected, reader.maxDoc());
|
||||
int numDel = 0;
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
|
@ -760,8 +760,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
int expected = (3+(1-i)*2)*NUM_THREAD*NUM_ITER;
|
||||
assertEquals("i=" + i, expected, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals(expected, reader.maxDoc());
|
||||
assertEquals("i=" + i, expected - NUM_THREAD*NUM_ITER, reader.docFreq(new Term("contents", "here")));
|
||||
assertEquals("i=" + i, expected, reader.maxDoc());
|
||||
int numDel = 0;
|
||||
final Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
assertNotNull(liveDocs);
|
||||
|
|
|
@ -123,14 +123,18 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
}
|
||||
|
||||
DocsEnum docsEnum = TestUtil.docs(random(), reader, "field", term, liveDocs, null, DocsEnum.FLAG_NONE);
|
||||
assertNotNull(docsEnum);
|
||||
|
||||
for(int docID : docs.get(term)) {
|
||||
if (!deleted.contains(docID)) {
|
||||
assertEquals(docID, docsEnum.nextDoc());
|
||||
if (docsEnum == null) {
|
||||
for(int docID : docs.get(term)) {
|
||||
assert deleted.contains(docID);
|
||||
}
|
||||
} else {
|
||||
for(int docID : docs.get(term)) {
|
||||
if (!deleted.contains(docID)) {
|
||||
assertEquals(docID, docsEnum.nextDoc());
|
||||
}
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
|
|
@ -891,9 +891,8 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
Directory d = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
Set<String> terms = new HashSet<String>();
|
||||
// nocommit
|
||||
String prefix = TestUtil.randomSimpleString(random(), 1, 20);
|
||||
//String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
|
||||
//String prefix = TestUtil.randomSimpleString(random(), 1, 20);
|
||||
String prefix = TestUtil.randomRealisticUnicodeString(random(), 1, 20);
|
||||
int numTerms = atLeast(1000);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: " + numTerms + " terms; prefix=" + prefix);
|
||||
|
|
|
@ -381,7 +381,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
@Override
|
||||
public int advance(int parentTarget) throws IOException {
|
||||
|
||||
//System.out.println("Q.advance parentTarget=" + parentTarget);
|
||||
// System.out.println("Q.advance parentTarget=" + parentTarget);
|
||||
if (parentTarget == NO_MORE_DOCS) {
|
||||
return parentDoc = NO_MORE_DOCS;
|
||||
}
|
||||
|
@ -398,13 +398,13 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
|
||||
prevParentDoc = parentBits.prevSetBit(parentTarget-1);
|
||||
|
||||
//System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
|
||||
// System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
|
||||
assert prevParentDoc >= parentDoc;
|
||||
if (prevParentDoc > nextChildDoc) {
|
||||
nextChildDoc = childScorer.advance(prevParentDoc);
|
||||
// System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
|
||||
//} else {
|
||||
//System.out.println(" skip childScorer advance");
|
||||
} else {
|
||||
// System.out.println(" skip childScorer advance");
|
||||
}
|
||||
|
||||
// Parent & child docs are supposed to be orthogonal:
|
||||
|
@ -413,15 +413,21 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
|
||||
final int nd = nextDoc();
|
||||
//System.out.println(" return nextParentDoc=" + nd);
|
||||
// System.out.println(" return nextParentDoc=" + nd);
|
||||
return nd;
|
||||
}
|
||||
|
||||
public Explanation explain(int docBase) throws IOException {
|
||||
int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
|
||||
int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
|
||||
int start = prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
|
||||
if (acceptDocs != null) {
|
||||
// Skip deleted docs:
|
||||
while (acceptDocs.get(start) == false) {
|
||||
start++;
|
||||
}
|
||||
}
|
||||
int end = parentDoc - 1; // -1 b/c parentDoc is parent doc
|
||||
return new ComplexExplanation(
|
||||
true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end)
|
||||
true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", docBase+start, docBase+end)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -623,8 +623,14 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
System.out.println("TEST: reader=" + r);
|
||||
System.out.println("TEST: joinReader=" + joinR);
|
||||
|
||||
Bits liveDocs = MultiFields.getLiveDocs(joinR);
|
||||
for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) {
|
||||
System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX));
|
||||
System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
|
||||
}
|
||||
DocsEnum parents = MultiFields.getTermDocsEnum(joinR, null, "isParent", new BytesRef("x"));
|
||||
System.out.println("parent docIDs:");
|
||||
while (parents.nextDoc() != parents.NO_MORE_DOCS) {
|
||||
System.out.println(" " + parents.docID());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -823,6 +829,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
|
||||
StoredDocument document = joinS.doc(hit.doc - 1);
|
||||
int childId = Integer.parseInt(document.get("childID"));
|
||||
//System.out.println(" hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
|
||||
assertTrue(explanation.isMatch());
|
||||
assertEquals(hit.score, explanation.getValue(), 0.0f);
|
||||
assertEquals(String.format(Locale.ROOT, "Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription());
|
||||
|
|
|
@ -315,11 +315,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// delete some docs
|
||||
// nocommit hmmm what to do
|
||||
/*
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
*/
|
||||
writer.shutdown();
|
||||
|
||||
// compare
|
||||
|
@ -379,11 +382,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// delete some docs
|
||||
// nocommit hmmm what to do
|
||||
/*
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
*/
|
||||
|
||||
// compare per-segment
|
||||
DirectoryReader ir = writer.getReader();
|
||||
|
@ -443,11 +449,14 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// delete some docs
|
||||
// nocommit hmmm what to do
|
||||
/*
|
||||
int numDeletions = random().nextInt(numDocs/10);
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
*/
|
||||
|
||||
// merge some segments and ensure that at least one of them has more than
|
||||
// 256 values
|
||||
|
|
Loading…
Reference in New Issue