mirror of https://github.com/apache/lucene.git
Speed up SegementDocsEnum by making it more friendly for JIT optimizations
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1215060 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18febd69e4
commit
a6cbca19a0
|
@ -209,22 +209,35 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
SegmentDocsEnum docsEnum;
|
||||
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
} else if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
|
||||
docsEnum = new SegmentDocsEnum(freqIn);
|
||||
} else {
|
||||
docsEnum = (SegmentDocsEnum) reuse;
|
||||
if (docsEnum.startFreqIn != freqIn) {
|
||||
// If you are using ParellelReader, and pass in a
|
||||
// reused DocsEnum, it could have come from another
|
||||
// reader also using standard codec
|
||||
docsEnum = new SegmentDocsEnum(freqIn);
|
||||
} else if (canReuse(reuse, liveDocs)) {
|
||||
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
|
||||
return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
|
||||
}
|
||||
return newDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
|
||||
}
|
||||
|
||||
private boolean canReuse(DocsEnum reuse, Bits liveDocs) {
|
||||
if (reuse != null && (reuse instanceof SegmentDocsEnumBase)) {
|
||||
SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase) reuse;
|
||||
// If you are using ParellelReader, and pass in a
|
||||
// reused DocsEnum, it could have come from another
|
||||
// reader also using standard codec
|
||||
if (docsEnum.startFreqIn == freqIn) {
|
||||
// we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs
|
||||
return liveDocs == docsEnum.liveDocs;
|
||||
}
|
||||
}
|
||||
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
|
||||
return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
|
||||
return false;
|
||||
}
|
||||
|
||||
private DocsEnum newDocsEnum(Bits liveDocs, FieldInfo fieldInfo, StandardTermState termState) throws IOException {
|
||||
if (liveDocs == null) {
|
||||
return new AllDocsSegmentDocsEnum(freqIn).reset(fieldInfo, termState);
|
||||
} else {
|
||||
return new LiveDocsSegmentDocsEnum(freqIn, liveDocs).reset(fieldInfo, termState);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -266,44 +279,47 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
static final int BUFFERSIZE = 64;
|
||||
|
||||
// Decodes only docs
|
||||
private class SegmentDocsEnum extends DocsEnum {
|
||||
final int[] docs = new int[BUFFERSIZE];
|
||||
final int[] freqs = new int[BUFFERSIZE];
|
||||
|
||||
private abstract class SegmentDocsEnumBase extends DocsEnum {
|
||||
|
||||
int start = -1;
|
||||
int count = 0;
|
||||
protected final int[] docs = new int[BUFFERSIZE];
|
||||
protected final int[] freqs = new int[BUFFERSIZE];
|
||||
|
||||
final IndexInput freqIn;
|
||||
final IndexInput startFreqIn;
|
||||
final IndexInput freqIn; // reuse
|
||||
final IndexInput startFreqIn; // reuse
|
||||
Lucene40SkipListReader skipper; // reuse - lazy loaded
|
||||
|
||||
protected boolean indexOmitsTF; // does current field omit term freq?
|
||||
protected boolean storePayloads; // does current field store payloads?
|
||||
|
||||
boolean indexOmitsTF; // does current field omit term freq?
|
||||
boolean storePayloads; // does current field store payloads?
|
||||
protected int limit; // number of docs in this posting
|
||||
protected int ord; // how many docs we've read
|
||||
protected int doc; // doc we last read
|
||||
protected int accum; // accumulator for doc deltas
|
||||
protected int freq; // freq we last read
|
||||
protected int maxBufferedDocId;
|
||||
|
||||
protected int start;
|
||||
protected int count;
|
||||
|
||||
int limit; // number of docs in this posting
|
||||
int ord; // how many docs we've read
|
||||
int doc = -1; // doc we last read
|
||||
int accum; // accumulator for doc deltas
|
||||
int freq; // freq we last read
|
||||
|
||||
Bits liveDocs;
|
||||
protected long freqOffset;
|
||||
protected int skipOffset;
|
||||
|
||||
long freqOffset;
|
||||
int skipOffset;
|
||||
|
||||
boolean skipped;
|
||||
Lucene40SkipListReader skipper;
|
||||
|
||||
public SegmentDocsEnum(IndexInput freqIn) throws IOException {
|
||||
startFreqIn = freqIn;
|
||||
this.freqIn = (IndexInput) freqIn.clone();
|
||||
protected boolean skipped;
|
||||
protected final Bits liveDocs;
|
||||
|
||||
SegmentDocsEnumBase(IndexInput startFreqIn, Bits liveDocs) throws IOException {
|
||||
this.startFreqIn = startFreqIn;
|
||||
this.freqIn = (IndexInput)startFreqIn.clone();
|
||||
this.liveDocs = liveDocs;
|
||||
|
||||
}
|
||||
|
||||
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||
|
||||
|
||||
DocsEnum reset(FieldInfo fieldInfo, StandardTermState termState) throws IOException {
|
||||
indexOmitsTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
this.liveDocs = liveDocs;
|
||||
freqOffset = termState.freqOffset;
|
||||
skipOffset = termState.skipOffset;
|
||||
|
||||
|
@ -317,129 +333,122 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
doc = -1;
|
||||
accum = 0;
|
||||
// if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
|
||||
|
||||
skipped = false;
|
||||
|
||||
start = -1;
|
||||
count = 0;
|
||||
maxBufferedDocId = -1;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public final int freq() {
|
||||
assert !indexOmitsTF;
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
public final int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while (++start < count) {
|
||||
int d = docs[start];
|
||||
if (liveDocs == null || liveDocs.get(d)) {
|
||||
freq = freqs[start];
|
||||
return doc = d;
|
||||
}
|
||||
}
|
||||
return doc = refill();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
public final int advance(int target) throws IOException {
|
||||
// last doc in our buffer is >= target, binary search + next()
|
||||
if (++start < count && docs[count-1] >= target) {
|
||||
binarySearch(target);
|
||||
return nextDoc();
|
||||
if (++start < count && maxBufferedDocId >= target) {
|
||||
if ((count-start) > 32) { // 32 seemed to be a sweetspot here so use binsearch if the pending results are a lot
|
||||
start = binarySearch(count - 1, start, target, docs);
|
||||
return nextDoc();
|
||||
} else {
|
||||
return linearScan(target);
|
||||
}
|
||||
}
|
||||
|
||||
start = count; // buffer is consumed
|
||||
|
||||
return doc = skipTo(target);
|
||||
return doc = skipTo(target, liveDocs);
|
||||
}
|
||||
|
||||
private void binarySearch(int target) {
|
||||
int hi = count - 1;
|
||||
while (start <= hi) {
|
||||
int mid = (hi + start) >>> 1;
|
||||
private final int binarySearch(int hi, int low, int target, int[] docs) {
|
||||
while (low <= hi) {
|
||||
int mid = (hi + low) >>> 1;
|
||||
int doc = docs[mid];
|
||||
if (doc < target) {
|
||||
start = mid + 1;
|
||||
low = mid + 1;
|
||||
} else if (doc > target) {
|
||||
hi = mid - 1;
|
||||
} else {
|
||||
start = mid;
|
||||
low = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
start--;
|
||||
return low-1;
|
||||
}
|
||||
|
||||
final int readFreq(final IndexInput freqIn, final int code)
|
||||
throws IOException {
|
||||
if ((code & 1) != 0) { // if low bit is set
|
||||
return 1; // freq is one
|
||||
} else {
|
||||
return freqIn.readVInt(); // else read freq
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract int linearScan(int scanTo) throws IOException;
|
||||
|
||||
protected abstract int scanTo(int target) throws IOException;
|
||||
|
||||
private int refill() throws IOException {
|
||||
int doc = scanTo(0);
|
||||
|
||||
int bufferSize = Math.min(docs.length, limit - ord);
|
||||
protected final int refill() throws IOException {
|
||||
final int doc = nextUnreadDoc();
|
||||
count = 0;
|
||||
start = -1;
|
||||
count = bufferSize;
|
||||
ord += bufferSize;
|
||||
|
||||
if (indexOmitsTF)
|
||||
fillDocs(bufferSize);
|
||||
else
|
||||
fillDocsAndFreqs(bufferSize);
|
||||
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
final int numDocs = Math.min(docs.length, limit - ord);
|
||||
ord += numDocs;
|
||||
if (indexOmitsTF) {
|
||||
count = fillDocs(numDocs);
|
||||
} else {
|
||||
count = fillDocsAndFreqs(numDocs);
|
||||
}
|
||||
maxBufferedDocId = count > 0 ? docs[count-1] : NO_MORE_DOCS;
|
||||
return doc;
|
||||
}
|
||||
|
||||
private int scanTo(int target) throws IOException {
|
||||
while (ord++ < limit) {
|
||||
int code = freqIn.readVInt();
|
||||
if (indexOmitsTF) {
|
||||
accum += code;
|
||||
} else {
|
||||
accum += code >>> 1; // shift off low bit
|
||||
if ((code & 1) != 0) { // if low bit is set
|
||||
freq = 1; // freq is one
|
||||
} else {
|
||||
freq = freqIn.readVInt(); // else read freq
|
||||
}
|
||||
}
|
||||
|
||||
if (accum >= target && (liveDocs == null || liveDocs.get(accum))) {
|
||||
return accum;
|
||||
}
|
||||
|
||||
protected abstract int nextUnreadDoc() throws IOException;
|
||||
|
||||
|
||||
private final int fillDocs(int size) throws IOException {
|
||||
final IndexInput freqIn = this.freqIn;
|
||||
final int docs[] = this.docs;
|
||||
int docAc = accum;
|
||||
for (int i = 0; i < size; i++) {
|
||||
docAc += freqIn.readVInt();
|
||||
docs[i] = docAc;
|
||||
}
|
||||
|
||||
return NO_MORE_DOCS;
|
||||
accum = docAc;
|
||||
return size;
|
||||
}
|
||||
|
||||
private void fillDocs(int size) throws IOException {
|
||||
int docs[] = this.docs;
|
||||
private final int fillDocsAndFreqs(int size) throws IOException {
|
||||
final IndexInput freqIn = this.freqIn;
|
||||
final int docs[] = this.docs;
|
||||
final int freqs[] = this.freqs;
|
||||
int docAc = accum;
|
||||
for (int i = 0; i < size; i++) {
|
||||
accum += freqIn.readVInt();
|
||||
docs[i] = accum;
|
||||
}
|
||||
}
|
||||
|
||||
private void fillDocsAndFreqs(int size) throws IOException {
|
||||
int docs[] = this.docs;
|
||||
int freqs[] = this.freqs;
|
||||
for (int i = 0; i < size; i++) {
|
||||
int code = freqIn.readVInt();
|
||||
accum += code >>> 1; // shift off low bit
|
||||
docs[i] = accum;
|
||||
if ((code & 1) != 0) { // if low bit is set
|
||||
freqs[i] = 1; // freq is one
|
||||
} else {
|
||||
freqs[i] = freqIn.readVInt(); // else read freq
|
||||
}
|
||||
final int code = freqIn.readVInt();
|
||||
docAc += code >>> 1; // shift off low bit
|
||||
freqs[i] = readFreq(freqIn, code);
|
||||
docs[i] = docAc;
|
||||
}
|
||||
accum = docAc;
|
||||
return size;
|
||||
|
||||
}
|
||||
|
||||
private int skipTo(int target) throws IOException {
|
||||
private final int skipTo(int target, Bits liveDocs) throws IOException {
|
||||
if ((target - skipInterval) >= accum && limit >= skipMinimum) {
|
||||
|
||||
// There are enough docs in the posting to have
|
||||
|
@ -476,13 +485,187 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
return scanTo(target);
|
||||
}
|
||||
}
|
||||
|
||||
private final class AllDocsSegmentDocsEnum extends SegmentDocsEnumBase {
|
||||
|
||||
AllDocsSegmentDocsEnum(IndexInput startFreqIn) throws IOException {
|
||||
super(startFreqIn, null);
|
||||
assert liveDocs == null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int nextDoc() throws IOException {
|
||||
if (++start < count) {
|
||||
freq = freqs[start];
|
||||
return doc = docs[start];
|
||||
}
|
||||
return doc = refill();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected final int linearScan(int scanTo) throws IOException {
|
||||
final int[] docs = this.docs;
|
||||
final int upTo = count;
|
||||
for (int i = start; i < upTo; i++) {
|
||||
final int d = docs[i];
|
||||
if (scanTo <= d) {
|
||||
start = i;
|
||||
freq = freqs[i];
|
||||
return doc = docs[i];
|
||||
}
|
||||
}
|
||||
return refill();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int scanTo(int target) throws IOException {
|
||||
int docAcc = accum;
|
||||
int frq = 1;
|
||||
final IndexInput freqIn = this.freqIn;
|
||||
final boolean omitTF = indexOmitsTF;
|
||||
final int loopLimit = limit;
|
||||
for (int i = ord; i < loopLimit; i++) {
|
||||
int code = freqIn.readVInt();
|
||||
if (omitTF) {
|
||||
docAcc += code;
|
||||
} else {
|
||||
docAcc += code >>> 1; // shift off low bit
|
||||
frq = readFreq(freqIn, code);
|
||||
}
|
||||
if (docAcc >= target) {
|
||||
freq = frq;
|
||||
ord = i + 1;
|
||||
return accum = docAcc;
|
||||
}
|
||||
}
|
||||
ord = limit;
|
||||
freq = frq;
|
||||
accum = docAcc;
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final int nextUnreadDoc() throws IOException {
|
||||
if (ord++ < limit) {
|
||||
int code = freqIn.readVInt();
|
||||
if (indexOmitsTF) {
|
||||
accum += code;
|
||||
} else {
|
||||
accum += code >>> 1; // shift off low bit
|
||||
freq = readFreq(freqIn, code);
|
||||
}
|
||||
return accum;
|
||||
} else {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final class LiveDocsSegmentDocsEnum extends SegmentDocsEnumBase {
|
||||
|
||||
LiveDocsSegmentDocsEnum(IndexInput startFreqIn, Bits liveDocs) throws IOException {
|
||||
super(startFreqIn, liveDocs);
|
||||
assert liveDocs != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int nextDoc() throws IOException {
|
||||
final Bits liveDocs = this.liveDocs;
|
||||
for (int i = start+1; i < count; i++) {
|
||||
int d = docs[i];
|
||||
if (liveDocs.get(d)) {
|
||||
start = i;
|
||||
freq = freqs[i];
|
||||
return doc = d;
|
||||
}
|
||||
}
|
||||
start = count;
|
||||
return doc = refill();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final int linearScan(int scanTo) throws IOException {
|
||||
final int[] docs = this.docs;
|
||||
final int upTo = count;
|
||||
final Bits liveDocs = this.liveDocs;
|
||||
for (int i = start; i < upTo; i++) {
|
||||
int d = docs[i];
|
||||
if (scanTo <= d && liveDocs.get(d)) {
|
||||
start = i;
|
||||
freq = freqs[i];
|
||||
return doc = docs[i];
|
||||
}
|
||||
}
|
||||
return refill();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int scanTo(int target) throws IOException {
|
||||
int docAcc = accum;
|
||||
int frq = 1;
|
||||
final IndexInput freqIn = this.freqIn;
|
||||
final boolean omitTF = indexOmitsTF;
|
||||
final int loopLimit = limit;
|
||||
final Bits liveDocs = this.liveDocs;
|
||||
for (int i = ord; i < loopLimit; i++) {
|
||||
int code = freqIn.readVInt();
|
||||
if (omitTF) {
|
||||
docAcc += code;
|
||||
} else {
|
||||
docAcc += code >>> 1; // shift off low bit
|
||||
frq = readFreq(freqIn, code);
|
||||
}
|
||||
if (docAcc >= target && liveDocs.get(docAcc)) {
|
||||
freq = frq;
|
||||
ord = i + 1;
|
||||
return accum = docAcc;
|
||||
}
|
||||
}
|
||||
ord = limit;
|
||||
freq = frq;
|
||||
accum = docAcc;
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final int nextUnreadDoc() throws IOException {
|
||||
int docAcc = accum;
|
||||
int frq = 1;
|
||||
final IndexInput freqIn = this.freqIn;
|
||||
final boolean omitTF = indexOmitsTF;
|
||||
final int loopLimit = limit;
|
||||
final Bits liveDocs = this.liveDocs;
|
||||
for (int i = ord; i < loopLimit; i++) {
|
||||
int code = freqIn.readVInt();
|
||||
if (omitTF) {
|
||||
docAcc += code;
|
||||
} else {
|
||||
docAcc += code >>> 1; // shift off low bit
|
||||
frq = readFreq(freqIn, code);
|
||||
}
|
||||
if (liveDocs.get(docAcc)) {
|
||||
freq = frq;
|
||||
ord = i + 1;
|
||||
return accum = docAcc;
|
||||
}
|
||||
}
|
||||
ord = limit;
|
||||
freq = frq;
|
||||
accum = docAcc;
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// TODO specialize DocsAndPosEnum too
|
||||
|
||||
// Decodes docs & positions. payloads are not present.
|
||||
private class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private final class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
final IndexInput startFreqIn;
|
||||
private final IndexInput freqIn;
|
||||
private final IndexInput proxIn;
|
||||
|
||||
int limit; // number of docs in this posting
|
||||
int ord; // how many docs we've read
|
||||
int doc = -1; // doc we last read
|
||||
|
@ -752,11 +935,11 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
// Decode next doc/freq pair
|
||||
final int code = freqIn.readVInt();
|
||||
|
||||
accum += code >>> 1; // shift off low bit
|
||||
if ((code & 1) != 0) { // if low bit is set
|
||||
freq = 1; // freq is one
|
||||
accum += code >>> 1; // shift off low bit
|
||||
if ((code & 1) != 0) { // if low bit is set
|
||||
freq = 1; // freq is one
|
||||
} else {
|
||||
freq = freqIn.readVInt(); // else read freq
|
||||
freq = freqIn.readVInt(); // else read freq
|
||||
}
|
||||
posPendingCount += freq;
|
||||
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
package org.apache.lucene.index.codecs.lucene40;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.Bits.MatchNoBits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestReuseDocsEnum extends LuceneTestCase {
|
||||
|
||||
public void testReuseDocsEnumNoReuse() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodec(cp));
|
||||
int numdocs = atLeast(20);
|
||||
createRandomIndex(numdocs, writer, random);
|
||||
writer.commit();
|
||||
|
||||
IndexReader open = IndexReader.open(dir);
|
||||
IndexReader[] sequentialSubReaders = open.getSequentialSubReaders();
|
||||
for (IndexReader indexReader : sequentialSubReaders) {
|
||||
Terms terms = indexReader.terms("body");
|
||||
TermsEnum iterator = terms.iterator(null);
|
||||
IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
|
||||
MatchNoBits bits = new Bits.MatchNoBits(open.maxDoc());
|
||||
while ((iterator.next()) != null) {
|
||||
DocsEnum docs = iterator.docs(random.nextBoolean() ? bits : new Bits.MatchNoBits(open.maxDoc()), null, random.nextBoolean());
|
||||
enums.put(docs, true);
|
||||
}
|
||||
|
||||
assertEquals(terms.getUniqueTermCount(), enums.size());
|
||||
}
|
||||
IOUtils.close(writer, open, dir);
|
||||
}
|
||||
|
||||
// tests for reuse only if bits are the same either null or the same instance
|
||||
public void testReuseDocsEnumSameBitsOrNull() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodec(cp));
|
||||
int numdocs = atLeast(20);
|
||||
createRandomIndex(numdocs, writer, random);
|
||||
writer.commit();
|
||||
|
||||
IndexReader open = IndexReader.open(dir);
|
||||
IndexReader[] sequentialSubReaders = open.getSequentialSubReaders();
|
||||
for (IndexReader indexReader : sequentialSubReaders) {
|
||||
Terms terms = indexReader.terms("body");
|
||||
TermsEnum iterator = terms.iterator(null);
|
||||
IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
|
||||
MatchNoBits bits = new Bits.MatchNoBits(open.maxDoc());
|
||||
DocsEnum docs = null;
|
||||
while ((iterator.next()) != null) {
|
||||
docs = iterator.docs(bits, docs, random.nextBoolean());
|
||||
enums.put(docs, true);
|
||||
}
|
||||
|
||||
assertEquals(1, enums.size());
|
||||
enums.clear();
|
||||
iterator = terms.iterator(null);
|
||||
docs = null;
|
||||
while ((iterator.next()) != null) {
|
||||
docs = iterator.docs(new Bits.MatchNoBits(open.maxDoc()), docs, random.nextBoolean());
|
||||
enums.put(docs, true);
|
||||
}
|
||||
assertEquals(terms.getUniqueTermCount(), enums.size());
|
||||
|
||||
enums.clear();
|
||||
iterator = terms.iterator(null);
|
||||
docs = null;
|
||||
while ((iterator.next()) != null) {
|
||||
docs = iterator.docs(null, docs, random.nextBoolean());
|
||||
enums.put(docs, true);
|
||||
}
|
||||
assertEquals(1, enums.size());
|
||||
}
|
||||
IOUtils.close(writer, open, dir);
|
||||
}
|
||||
|
||||
// make sure we never reuse from another reader even if it is the same field & codec etc
|
||||
public void testReuseDocsEnumDifferentReader() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodec(cp));
|
||||
int numdocs = atLeast(20);
|
||||
createRandomIndex(numdocs, writer, random);
|
||||
writer.commit();
|
||||
|
||||
IndexReader firstReader = IndexReader.open(dir);
|
||||
IndexReader secondReader = IndexReader.open(dir);
|
||||
IndexReader[] sequentialSubReaders = firstReader.getSequentialSubReaders();
|
||||
IndexReader[] sequentialSubReaders2 = secondReader.getSequentialSubReaders();
|
||||
|
||||
for (IndexReader indexReader : sequentialSubReaders) {
|
||||
Terms terms = indexReader.terms("body");
|
||||
TermsEnum iterator = terms.iterator(null);
|
||||
IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
|
||||
MatchNoBits bits = new Bits.MatchNoBits(firstReader.maxDoc());
|
||||
iterator = terms.iterator(null);
|
||||
DocsEnum docs = null;
|
||||
BytesRef term = null;
|
||||
while ((term = iterator.next()) != null) {
|
||||
docs = iterator.docs(null, randomDocsEnum("body", term, sequentialSubReaders2, bits), random.nextBoolean());
|
||||
enums.put(docs, true);
|
||||
}
|
||||
assertEquals(terms.getUniqueTermCount(), enums.size());
|
||||
|
||||
iterator = terms.iterator(null);
|
||||
enums.clear();
|
||||
docs = null;
|
||||
while ((term = iterator.next()) != null) {
|
||||
docs = iterator.docs(bits, randomDocsEnum("body", term, sequentialSubReaders2, bits), random.nextBoolean());
|
||||
enums.put(docs, true);
|
||||
}
|
||||
assertEquals(terms.getUniqueTermCount(), enums.size());
|
||||
}
|
||||
IOUtils.close(writer, firstReader, secondReader, dir);
|
||||
}
|
||||
|
||||
public DocsEnum randomDocsEnum(String field, BytesRef term, IndexReader[] readers, Bits bits) throws IOException {
|
||||
if (random.nextInt(10) == 0) {
|
||||
return null;
|
||||
}
|
||||
IndexReader indexReader = readers[random.nextInt(readers.length)];
|
||||
return indexReader.termDocsEnum(bits, field, term, random.nextBoolean());
|
||||
}
|
||||
|
||||
/**
|
||||
* populates a writer with random stuff. this must be fully reproducable with
|
||||
* the seed!
|
||||
*/
|
||||
public static void createRandomIndex(int numdocs, RandomIndexWriter writer,
|
||||
Random random) throws IOException {
|
||||
LineFileDocs lineFileDocs = new LineFileDocs(random);
|
||||
|
||||
for (int i = 0; i < numdocs; i++) {
|
||||
writer.addDocument(lineFileDocs.nextDoc());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue