mirror of https://github.com/apache/lucene.git
LUCENE-2922: optimize the scan-within-block step of BlockTermsReader.seek
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1071564 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0d57f3b786
commit
3f8c9b5cfc
|
@ -71,7 +71,7 @@ public class AppendingCodec extends Codec {
|
||||||
}
|
}
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new AppendingTermsDictWriter(indexWriter, state, docsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new AppendingTermsDictWriter(indexWriter, state, docsWriter);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -111,7 +111,6 @@ public class AppendingCodec extends Codec {
|
||||||
state.dir, state.fieldInfos, state.segmentInfo.name,
|
state.dir, state.fieldInfos, state.segmentInfo.name,
|
||||||
docsReader,
|
docsReader,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
StandardCodec.TERMS_CACHE_SIZE,
|
StandardCodec.TERMS_CACHE_SIZE,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs.appending;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||||
|
@ -27,7 +26,6 @@ import org.apache.lucene.index.codecs.BlockTermsWriter;
|
||||||
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
|
||||||
public class AppendingTermsDictReader extends BlockTermsReader {
|
public class AppendingTermsDictReader extends BlockTermsReader {
|
||||||
|
@ -35,9 +33,9 @@ public class AppendingTermsDictReader extends BlockTermsReader {
|
||||||
public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
|
public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
|
||||||
Directory dir, FieldInfos fieldInfos, String segment,
|
Directory dir, FieldInfos fieldInfos, String segment,
|
||||||
PostingsReaderBase postingsReader, int readBufferSize,
|
PostingsReaderBase postingsReader, int readBufferSize,
|
||||||
Comparator<BytesRef> termComp, int termsCacheSize, String codecId) throws IOException {
|
int termsCacheSize, String codecId) throws IOException {
|
||||||
super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
|
super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
|
||||||
termComp, termsCacheSize, codecId);
|
termsCacheSize, codecId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,23 +18,21 @@ package org.apache.lucene.index.codecs.appending;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.index.codecs.BlockTermsWriter;
|
import org.apache.lucene.index.codecs.BlockTermsWriter;
|
||||||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
|
||||||
public class AppendingTermsDictWriter extends BlockTermsWriter {
|
public class AppendingTermsDictWriter extends BlockTermsWriter {
|
||||||
final static String CODEC_NAME = "APPENDING_TERMS_DICT";
|
final static String CODEC_NAME = "APPENDING_TERMS_DICT";
|
||||||
|
|
||||||
public AppendingTermsDictWriter(TermsIndexWriterBase indexWriter,
|
public AppendingTermsDictWriter(TermsIndexWriterBase indexWriter,
|
||||||
SegmentWriteState state, PostingsWriterBase postingsWriter,
|
SegmentWriteState state, PostingsWriterBase postingsWriter)
|
||||||
Comparator<BytesRef> termComp) throws IOException {
|
throws IOException {
|
||||||
super(indexWriter, state, postingsWriter, termComp);
|
super(indexWriter, state, postingsWriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -30,4 +30,9 @@ public class OrdTermState extends TermState {
|
||||||
assert other instanceof OrdTermState : "can not copy from " + other.getClass().getName();
|
assert other instanceof OrdTermState : "can not copy from " + other.getClass().getName();
|
||||||
this.ord = ((OrdTermState) other).ord;
|
this.ord = ((OrdTermState) other).ord;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "OrdTermState ord=" + ord;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,4 +44,9 @@ public abstract class TermState implements Cloneable {
|
||||||
throw new RuntimeException(cnse);
|
throw new RuntimeException(cnse);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "TermState";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -51,6 +51,6 @@ public class BlockTermState extends OrdTermState {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return super.toString() + "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
|
return "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,9 +66,6 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
|
private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
|
||||||
|
|
||||||
// Comparator that orders our terms
|
|
||||||
private final Comparator<BytesRef> termComp;
|
|
||||||
|
|
||||||
// Caches the most recently looked-up field + terms:
|
// Caches the most recently looked-up field + terms:
|
||||||
private final DoubleBarrelLRUCache<FieldAndTerm,BlockTermState> termsCache;
|
private final DoubleBarrelLRUCache<FieldAndTerm,BlockTermState> termsCache;
|
||||||
|
|
||||||
|
@ -111,13 +108,12 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
//private String segment;
|
//private String segment;
|
||||||
|
|
||||||
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
|
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
|
||||||
Comparator<BytesRef> termComp, int termsCacheSize, String codecId)
|
int termsCacheSize, String codecId)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
this.postingsReader = postingsReader;
|
this.postingsReader = postingsReader;
|
||||||
termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
|
termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
|
||||||
|
|
||||||
this.termComp = termComp;
|
|
||||||
//this.segment = segment;
|
//this.segment = segment;
|
||||||
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
|
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
|
||||||
readBufferSize);
|
readBufferSize);
|
||||||
|
@ -260,7 +256,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return termComp;
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -342,23 +338,29 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return termComp;
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: we may want an alternate mode here which is
|
||||||
|
// "if you are about to return NOT_FOUND I won't use
|
||||||
|
// the terms data from that"; eg FuzzyTermsEnum will
|
||||||
|
// (usually) just immediately call seek again if we
|
||||||
|
// return NOT_FOUND so it's a waste for us to fill in
|
||||||
|
// the term that was actually NOT_FOUND
|
||||||
@Override
|
@Override
|
||||||
public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException {
|
public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException {
|
||||||
|
|
||||||
if (indexEnum == null) {
|
if (indexEnum == null) {
|
||||||
throw new IllegalStateException("terms index was not loaded");
|
throw new IllegalStateException("terms index was not loaded");
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
|
|
||||||
/*
|
/*
|
||||||
|
System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
|
||||||
if (didIndexNext) {
|
if (didIndexNext) {
|
||||||
if (nextIndexTerm == null) {
|
if (nextIndexTerm == null) {
|
||||||
//System.out.println(" nextIndexTerm=null");
|
System.out.println(" nextIndexTerm=null");
|
||||||
} else {
|
} else {
|
||||||
//System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
|
System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
@ -386,7 +388,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
// is after current term but before next index term:
|
// is after current term but before next index term:
|
||||||
if (indexIsCurrent) {
|
if (indexIsCurrent) {
|
||||||
|
|
||||||
final int cmp = termComp.compare(term, target);
|
final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term, target);
|
||||||
|
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
// Already at the requested term
|
// Already at the requested term
|
||||||
|
@ -404,7 +406,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
didIndexNext = true;
|
didIndexNext = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nextIndexTerm == null || termComp.compare(target, nextIndexTerm) < 0) {
|
if (nextIndexTerm == null || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
|
||||||
// Optimization: requested term is within the
|
// Optimization: requested term is within the
|
||||||
// same term block we are now in; skip seeking
|
// same term block we are now in; skip seeking
|
||||||
// (but do scanning):
|
// (but do scanning):
|
||||||
|
@ -434,48 +436,175 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
state.ord = indexEnum.ord()-1;
|
state.ord = indexEnum.ord()-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: the first _next() after an index seek is
|
|
||||||
// a bit wasteful, since it redundantly reads some
|
|
||||||
// suffix bytes into the buffer. We could avoid storing
|
|
||||||
// those bytes in the primary file, but then when
|
|
||||||
// next()ing over an index term we'd have to
|
|
||||||
// special case it:
|
|
||||||
term.copy(indexEnum.term());
|
term.copy(indexEnum.term());
|
||||||
//System.out.println(" seek: term=" + term.utf8ToString());
|
//System.out.println(" seek: term=" + term.utf8ToString());
|
||||||
} else {
|
} else {
|
||||||
////System.out.println(" skip seek");
|
//System.out.println(" skip seek");
|
||||||
|
if (state.termCount == state.blockTermCount && !nextBlock()) {
|
||||||
|
indexIsCurrent = false;
|
||||||
|
return SeekStatus.END;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
seekPending = false;
|
seekPending = false;
|
||||||
|
|
||||||
// Now scan:
|
int common = 0;
|
||||||
while (_next() != null) {
|
|
||||||
final int cmp = termComp.compare(term, target);
|
// Scan within block. We could do this by calling
|
||||||
if (cmp == 0) {
|
// _next() and testing the resulting term, but this
|
||||||
// Match!
|
// is wasteful. Instead, we first confirm the
|
||||||
if (useCache) {
|
// target matches the common prefix of this block,
|
||||||
// Store in cache
|
// and then we scan the term bytes directly from the
|
||||||
decodeMetaData();
|
// termSuffixesreader's byte[], saving a copy into
|
||||||
termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
|
// the BytesRef term per term. Only when we return
|
||||||
|
// do we then copy the bytes into the term.
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
|
||||||
|
// First, see if target term matches common prefix
|
||||||
|
// in this block:
|
||||||
|
if (common < termBlockPrefix) {
|
||||||
|
final int cmp = (term.bytes[common]&0xFF) - (target.bytes[target.offset + common]&0xFF);
|
||||||
|
if (cmp < 0) {
|
||||||
|
|
||||||
|
// TODO: maybe we should store common prefix
|
||||||
|
// in block header? (instead of relying on
|
||||||
|
// last term of previous block)
|
||||||
|
|
||||||
|
// Target's prefix is after the common block
|
||||||
|
// prefix, so term cannot be in this block
|
||||||
|
// but it could be in next block. We
|
||||||
|
// must scan to end-of-block to set common
|
||||||
|
// prefix for next block:
|
||||||
|
if (state.termCount < state.blockTermCount) {
|
||||||
|
while(state.termCount < state.blockTermCount-1) {
|
||||||
|
state.termCount++;
|
||||||
|
state.ord++;
|
||||||
|
termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
|
||||||
|
}
|
||||||
|
final int suffix = termSuffixesReader.readVInt();
|
||||||
|
term.length = termBlockPrefix + suffix;
|
||||||
|
if (term.bytes.length < term.length) {
|
||||||
|
term.grow(term.length);
|
||||||
|
}
|
||||||
|
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
|
||||||
|
}
|
||||||
|
state.ord++;
|
||||||
|
|
||||||
|
if (!nextBlock()) {
|
||||||
|
indexIsCurrent = false;
|
||||||
|
return SeekStatus.END;
|
||||||
|
}
|
||||||
|
common = 0;
|
||||||
|
|
||||||
|
} else if (cmp > 0) {
|
||||||
|
// Target's prefix is before the common prefix
|
||||||
|
// of this block, so we position to start of
|
||||||
|
// block and return NOT_FOUND:
|
||||||
|
assert state.termCount == 0;
|
||||||
|
|
||||||
|
final int suffix = termSuffixesReader.readVInt();
|
||||||
|
term.length = termBlockPrefix + suffix;
|
||||||
|
if (term.bytes.length < term.length) {
|
||||||
|
term.grow(term.length);
|
||||||
|
}
|
||||||
|
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
} else {
|
||||||
|
common++;
|
||||||
}
|
}
|
||||||
//System.out.println(" FOUND");
|
|
||||||
return SeekStatus.FOUND;
|
continue;
|
||||||
} else if (cmp > 0) {
|
|
||||||
//System.out.println(" NOT_FOUND term=" + term.utf8ToString());
|
|
||||||
return SeekStatus.NOT_FOUND;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test every term in this block
|
||||||
|
while (true) {
|
||||||
|
state.termCount++;
|
||||||
|
state.ord++;
|
||||||
|
|
||||||
|
final int suffix = termSuffixesReader.readVInt();
|
||||||
|
|
||||||
|
// We know the prefix matches, so just compare the new suffix:
|
||||||
|
final int termLen = termBlockPrefix + suffix;
|
||||||
|
int bytePos = termSuffixesReader.getPosition();
|
||||||
|
|
||||||
|
boolean next = false;
|
||||||
|
final int limit = target.offset + (termLen < target.length ? termLen : target.length);
|
||||||
|
int targetPos = target.offset + termBlockPrefix;
|
||||||
|
while(targetPos < limit) {
|
||||||
|
final int cmp = (termSuffixes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
|
||||||
|
if (cmp < 0) {
|
||||||
|
// Current term is still before the target;
|
||||||
|
// keep scanning
|
||||||
|
next = true;
|
||||||
|
break;
|
||||||
|
} else if (cmp > 0) {
|
||||||
|
// Done! Current term is after target. Stop
|
||||||
|
// here, fill in real term, return NOT_FOUND.
|
||||||
|
term.length = termBlockPrefix + suffix;
|
||||||
|
if (term.bytes.length < term.length) {
|
||||||
|
term.grow(term.length);
|
||||||
|
}
|
||||||
|
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
|
||||||
|
//System.out.println(" NOT_FOUND");
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!next && target.length <= termLen) {
|
||||||
|
term.length = termBlockPrefix + suffix;
|
||||||
|
if (term.bytes.length < term.length) {
|
||||||
|
term.grow(term.length);
|
||||||
|
}
|
||||||
|
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
|
||||||
|
|
||||||
|
if (target.length == termLen) {
|
||||||
|
// Done! Exact match. Stop here, fill in
|
||||||
|
// real term, return FOUND.
|
||||||
|
//System.out.println(" FOUND");
|
||||||
|
|
||||||
|
if (useCache) {
|
||||||
|
// Store in cache
|
||||||
|
decodeMetaData();
|
||||||
|
//System.out.println(" cache! state=" + state);
|
||||||
|
termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
return SeekStatus.FOUND;
|
||||||
|
} else {
|
||||||
|
//System.out.println(" NOT_FOUND");
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.termCount == state.blockTermCount) {
|
||||||
|
// Must pre-fill term for next block's common prefix
|
||||||
|
term.length = termBlockPrefix + suffix;
|
||||||
|
if (term.bytes.length < term.length) {
|
||||||
|
term.grow(term.length);
|
||||||
|
}
|
||||||
|
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
termSuffixesReader.skipBytes(suffix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The purpose of the terms dict index is to seek
|
// The purpose of the terms dict index is to seek
|
||||||
// the enum to the closest index term before the
|
// the enum to the closest index term before the
|
||||||
// term we are looking for. So, we should never
|
// term we are looking for. So, we should never
|
||||||
// cross another index term (besides the first
|
// cross another index term (besides the first
|
||||||
// one) while we are scanning:
|
// one) while we are scanning:
|
||||||
assert indexIsCurrent;
|
|
||||||
}
|
|
||||||
|
|
||||||
indexIsCurrent = false;
|
assert indexIsCurrent;
|
||||||
//System.out.println(" END");
|
|
||||||
return SeekStatus.END;
|
if (!nextBlock()) {
|
||||||
|
//System.out.println(" END");
|
||||||
|
indexIsCurrent = false;
|
||||||
|
return SeekStatus.END;
|
||||||
|
}
|
||||||
|
common = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -515,12 +644,10 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
decode all metadata up to the current term. */
|
decode all metadata up to the current term. */
|
||||||
private BytesRef _next() throws IOException {
|
private BytesRef _next() throws IOException {
|
||||||
//System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")");
|
//System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")");
|
||||||
if (state.termCount == state.blockTermCount) {
|
if (state.termCount == state.blockTermCount && !nextBlock()) {
|
||||||
if (!nextBlock()) {
|
//System.out.println(" eof");
|
||||||
//System.out.println(" eof");
|
indexIsCurrent = false;
|
||||||
indexIsCurrent = false;
|
return null;
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: cutover to something better for these ints! simple64?
|
// TODO: cutover to something better for these ints! simple64?
|
||||||
|
@ -689,7 +816,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
//System.out.println(" termSuffixes len=" + len);
|
//System.out.println(" termSuffixes len=" + len);
|
||||||
in.readBytes(termSuffixes, 0, len);
|
in.readBytes(termSuffixes, 0, len);
|
||||||
termSuffixesReader.reset(termSuffixes);
|
termSuffixesReader.reset(termSuffixes, 0, len);
|
||||||
|
|
||||||
// docFreq, totalTermFreq
|
// docFreq, totalTermFreq
|
||||||
len = in.readVInt();
|
len = in.readVInt();
|
||||||
|
@ -698,7 +825,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
//System.out.println(" freq bytes len=" + len);
|
//System.out.println(" freq bytes len=" + len);
|
||||||
in.readBytes(docFreqBytes, 0, len);
|
in.readBytes(docFreqBytes, 0, len);
|
||||||
freqReader.reset(docFreqBytes);
|
freqReader.reset(docFreqBytes, 0, len);
|
||||||
metaDataUpto = 0;
|
metaDataUpto = 0;
|
||||||
|
|
||||||
state.termCount = 0;
|
state.termCount = 0;
|
||||||
|
@ -717,23 +844,32 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
if (!seekPending) {
|
if (!seekPending) {
|
||||||
// lazily catch up on metadata decode:
|
// lazily catch up on metadata decode:
|
||||||
final int limit = state.termCount;
|
final int limit = state.termCount;
|
||||||
|
// We must set/incr state.termCount because
|
||||||
|
// postings impl can look at this
|
||||||
state.termCount = metaDataUpto;
|
state.termCount = metaDataUpto;
|
||||||
|
// TODO: better API would be "jump straight to term=N"???
|
||||||
while (metaDataUpto < limit) {
|
while (metaDataUpto < limit) {
|
||||||
//System.out.println(" decode");
|
//System.out.println(" decode mdUpto=" + metaDataUpto);
|
||||||
// TODO: we could make "tiers" of metadata, ie,
|
// TODO: we could make "tiers" of metadata, ie,
|
||||||
// decode docFreq/totalTF but don't decode postings
|
// decode docFreq/totalTF but don't decode postings
|
||||||
// metadata; this way caller could get
|
// metadata; this way caller could get
|
||||||
// docFreq/totalTF w/o paying decode cost for
|
// docFreq/totalTF w/o paying decode cost for
|
||||||
// postings
|
// postings
|
||||||
|
|
||||||
|
// TODO: if docFreq were bulk decoded we could
|
||||||
|
// just skipN here:
|
||||||
state.docFreq = freqReader.readVInt();
|
state.docFreq = freqReader.readVInt();
|
||||||
|
//System.out.println(" dF=" + state.docFreq);
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||||
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
state.totalTermFreq = state.docFreq + freqReader.readVLong();
|
||||||
|
//System.out.println(" totTF=" + state.totalTermFreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
postingsReader.nextTerm(fieldInfo, state);
|
postingsReader.nextTerm(fieldInfo, state);
|
||||||
metaDataUpto++;
|
metaDataUpto++;
|
||||||
state.termCount++;
|
state.termCount++;
|
||||||
}
|
}
|
||||||
} else {
|
//} else {
|
||||||
//System.out.println(" skip! seekPending");
|
//System.out.println(" skip! seekPending");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,24 +63,23 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
FieldInfo currentField;
|
FieldInfo currentField;
|
||||||
private final TermsIndexWriterBase termsIndexWriter;
|
private final TermsIndexWriterBase termsIndexWriter;
|
||||||
private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
|
private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
|
||||||
private final Comparator<BytesRef> termComp;
|
|
||||||
private final String segment;
|
//private final String segment;
|
||||||
|
|
||||||
public BlockTermsWriter(
|
public BlockTermsWriter(
|
||||||
TermsIndexWriterBase termsIndexWriter,
|
TermsIndexWriterBase termsIndexWriter,
|
||||||
SegmentWriteState state,
|
SegmentWriteState state,
|
||||||
PostingsWriterBase postingsWriter,
|
PostingsWriterBase postingsWriter)
|
||||||
Comparator<BytesRef> termComp) throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
|
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
|
||||||
this.termsIndexWriter = termsIndexWriter;
|
this.termsIndexWriter = termsIndexWriter;
|
||||||
this.termComp = termComp;
|
|
||||||
out = state.directory.createOutput(termsFileName);
|
out = state.directory.createOutput(termsFileName);
|
||||||
fieldInfos = state.fieldInfos;
|
fieldInfos = state.fieldInfos;
|
||||||
writeHeader(out);
|
writeHeader(out);
|
||||||
currentField = null;
|
currentField = null;
|
||||||
this.postingsWriter = postingsWriter;
|
this.postingsWriter = postingsWriter;
|
||||||
segment = state.segmentName;
|
//segment = state.segmentName;
|
||||||
|
|
||||||
//System.out.println("BTW.init seg=" + state.segmentName);
|
//System.out.println("BTW.init seg=" + state.segmentName);
|
||||||
|
|
||||||
|
@ -161,7 +160,6 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
private long numTerms;
|
private long numTerms;
|
||||||
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
|
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
|
||||||
long sumTotalTermFreq;
|
long sumTotalTermFreq;
|
||||||
private final BytesRef lastTerm = new BytesRef();
|
|
||||||
|
|
||||||
private TermEntry[] pendingTerms;
|
private TermEntry[] pendingTerms;
|
||||||
|
|
||||||
|
@ -185,12 +183,12 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return termComp;
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||||
//System.out.println("BTW.startTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text);
|
//System.out.println("BTW.startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
|
||||||
postingsWriter.startTerm();
|
postingsWriter.startTerm();
|
||||||
return postingsWriter;
|
return postingsWriter;
|
||||||
}
|
}
|
||||||
|
@ -201,7 +199,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||||
|
|
||||||
assert stats.docFreq > 0;
|
assert stats.docFreq > 0;
|
||||||
//System.out.println("BTW.finishTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " df=" + stats.docFreq);
|
//System.out.println("BTW.finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
|
||||||
|
|
||||||
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
|
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
|
||||||
|
|
||||||
|
@ -213,6 +211,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
flushBlock();
|
flushBlock();
|
||||||
}
|
}
|
||||||
fieldIndexWriter.add(text, stats, out.getFilePointer());
|
fieldIndexWriter.add(text, stats, out.getFilePointer());
|
||||||
|
//System.out.println(" index term!");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pendingTerms.length == pendingCount) {
|
if (pendingTerms.length == pendingCount) {
|
||||||
|
@ -265,7 +264,7 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
private final RAMOutputStream bytesWriter = new RAMOutputStream();
|
private final RAMOutputStream bytesWriter = new RAMOutputStream();
|
||||||
|
|
||||||
private void flushBlock() throws IOException {
|
private void flushBlock() throws IOException {
|
||||||
//System.out.println("BTW.flushBlock pendingCount=" + pendingCount);
|
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
|
||||||
|
|
||||||
// First pass: compute common prefix for all terms
|
// First pass: compute common prefix for all terms
|
||||||
// in the block, against term before first term in
|
// in the block, against term before first term in
|
||||||
|
|
|
@ -89,7 +89,7 @@ public class PulsingCodec extends Codec {
|
||||||
// Terms dict
|
// Terms dict
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -136,7 +136,6 @@ public class PulsingCodec extends Codec {
|
||||||
state.dir, state.fieldInfos, state.segmentInfo.name,
|
state.dir, state.fieldInfos, state.segmentInfo.name,
|
||||||
pulsingReader,
|
pulsingReader,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
StandardCodec.TERMS_CACHE_SIZE,
|
StandardCodec.TERMS_CACHE_SIZE,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -144,7 +144,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
//System.out.println(" count=" + count + " threshold=" + maxPositions);
|
//System.out.println(" count=" + count + " threshold=" + maxPositions);
|
||||||
|
|
||||||
if (count <= maxPositions) {
|
if (count <= maxPositions) {
|
||||||
//System.out.println(" inlined");
|
//System.out.println(" inlined pos=" + termState.inlinedBytesReader.getPosition());
|
||||||
|
|
||||||
// Inlined into terms dict -- just read the byte[] blob in,
|
// Inlined into terms dict -- just read the byte[] blob in,
|
||||||
// but don't decode it now (we only decode when a DocsEnum
|
// but don't decode it now (we only decode when a DocsEnum
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Set;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
@ -66,7 +65,7 @@ public class StandardCodec extends Codec {
|
||||||
|
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -109,7 +108,6 @@ public class StandardCodec extends Codec {
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
postings,
|
postings,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
TERMS_CACHE_SIZE,
|
TERMS_CACHE_SIZE,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -126,7 +126,7 @@ public class MockFixedIntBlockCodec extends Codec {
|
||||||
|
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -170,7 +170,6 @@ public class MockFixedIntBlockCodec extends Codec {
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
postingsReader,
|
postingsReader,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
StandardCodec.TERMS_CACHE_SIZE,
|
StandardCodec.TERMS_CACHE_SIZE,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -150,7 +150,7 @@ public class MockVariableIntBlockCodec extends Codec {
|
||||||
|
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -195,7 +195,6 @@ public class MockVariableIntBlockCodec extends Codec {
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
postingsReader,
|
postingsReader,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
StandardCodec.TERMS_CACHE_SIZE,
|
StandardCodec.TERMS_CACHE_SIZE,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -205,7 +205,7 @@ public class MockRandomCodec extends Codec {
|
||||||
|
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -306,7 +306,6 @@ public class MockRandomCodec extends Codec {
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
postingsReader,
|
postingsReader,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
termsCacheSize,
|
termsCacheSize,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -70,7 +70,7 @@ public class MockSepCodec extends Codec {
|
||||||
|
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
|
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -114,7 +114,6 @@ public class MockSepCodec extends Codec {
|
||||||
state.segmentInfo.name,
|
state.segmentInfo.name,
|
||||||
postingsReader,
|
postingsReader,
|
||||||
state.readBufferSize,
|
state.readBufferSize,
|
||||||
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
|
||||||
StandardCodec.TERMS_CACHE_SIZE,
|
StandardCodec.TERMS_CACHE_SIZE,
|
||||||
state.codecId);
|
state.codecId);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -496,139 +496,13 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MyCodecs extends CodecProvider {
|
|
||||||
MyCodecs() {
|
|
||||||
Codec ram = new RAMOnlyCodec();
|
|
||||||
register(ram);
|
|
||||||
setDefaultFieldCodec(ram.name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// copied from PulsingCodec, just changing the terms
|
|
||||||
// comparator
|
|
||||||
private static class PulsingReverseTermsCodec extends Codec {
|
|
||||||
|
|
||||||
public PulsingReverseTermsCodec() {
|
|
||||||
name = "PulsingReverseTerms";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
|
||||||
PostingsWriterBase docsWriter = new StandardPostingsWriter(state);
|
|
||||||
|
|
||||||
// Terms that have <= freqCutoff number of docs are
|
|
||||||
// "pulsed" (inlined):
|
|
||||||
final int freqCutoff = 1;
|
|
||||||
PostingsWriterBase pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);
|
|
||||||
|
|
||||||
// Terms dict index
|
|
||||||
TermsIndexWriterBase indexWriter;
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
indexWriter = new FixedGapTermsIndexWriter(state) {
|
|
||||||
// We sort in reverse unicode order, so, we must
|
|
||||||
// disable the suffix-stripping opto that
|
|
||||||
// FixedGapTermsIndexWriter does by default!
|
|
||||||
@Override
|
|
||||||
protected int indexedTermPrefixLength(BytesRef priorTerm, BytesRef indexedTerm) {
|
|
||||||
return indexedTerm.length;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
pulsingWriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Terms dict
|
|
||||||
success = false;
|
|
||||||
try {
|
|
||||||
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, reverseUnicodeComparator);
|
|
||||||
success = true;
|
|
||||||
return ret;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
try {
|
|
||||||
pulsingWriter.close();
|
|
||||||
} finally {
|
|
||||||
indexWriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
|
||||||
|
|
||||||
PostingsReaderBase docsReader = new StandardPostingsReader(state.dir, state.segmentInfo, state.readBufferSize, state.codecId);
|
|
||||||
PostingsReaderBase pulsingReader = new PulsingPostingsReaderImpl(docsReader);
|
|
||||||
|
|
||||||
// Terms dict index reader
|
|
||||||
TermsIndexReaderBase indexReader;
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
indexReader = new FixedGapTermsIndexReader(state.dir,
|
|
||||||
state.fieldInfos,
|
|
||||||
state.segmentInfo.name,
|
|
||||||
state.termsIndexDivisor,
|
|
||||||
reverseUnicodeComparator,
|
|
||||||
state.codecId);
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
pulsingReader.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Terms dict reader
|
|
||||||
success = false;
|
|
||||||
try {
|
|
||||||
FieldsProducer ret = new BlockTermsReader(indexReader,
|
|
||||||
state.dir,
|
|
||||||
state.fieldInfos,
|
|
||||||
state.segmentInfo.name,
|
|
||||||
pulsingReader,
|
|
||||||
state.readBufferSize,
|
|
||||||
reverseUnicodeComparator,
|
|
||||||
StandardCodec.TERMS_CACHE_SIZE,
|
|
||||||
state.codecId);
|
|
||||||
success = true;
|
|
||||||
return ret;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
try {
|
|
||||||
pulsingReader.close();
|
|
||||||
} finally {
|
|
||||||
indexReader.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) throws IOException {
|
|
||||||
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
|
|
||||||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
|
||||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void getExtensions(Set<String> extensions) {
|
|
||||||
StandardCodec.getStandardExtensions(extensions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// tests storing "id" and "field2" fields as pulsing codec,
|
// tests storing "id" and "field2" fields as pulsing codec,
|
||||||
// whose term sort is backwards unicode code point, and
|
// whose term sort is backwards unicode code point, and
|
||||||
// storing "field1" as a custom entirely-in-RAM codec
|
// storing "field1" as a custom entirely-in-RAM codec
|
||||||
public void testPerFieldCodec() throws Exception {
|
public void testPerFieldCodec() throws Exception {
|
||||||
CodecProvider provider = new MyCodecs();
|
CodecProvider provider = new CoreCodecProvider();
|
||||||
Codec pulsing = new PulsingReverseTermsCodec();
|
provider.register(new RAMOnlyCodec());
|
||||||
provider.register(pulsing);
|
provider.setDefaultFieldCodec("RamOnly");
|
||||||
|
|
||||||
|
|
||||||
final int NUM_DOCS = 173;
|
final int NUM_DOCS = 173;
|
||||||
MockDirectoryWrapper dir = newDirectory();
|
MockDirectoryWrapper dir = newDirectory();
|
||||||
|
@ -645,11 +519,11 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
doc.add(newField("field1", "this field uses the standard codec as the test", Field.Store.NO, Field.Index.ANALYZED));
|
doc.add(newField("field1", "this field uses the standard codec as the test", Field.Store.NO, Field.Index.ANALYZED));
|
||||||
// uses pulsing codec:
|
// uses pulsing codec:
|
||||||
Field field2 = newField("field2", "this field uses the pulsing codec as the test", Field.Store.NO, Field.Index.ANALYZED);
|
Field field2 = newField("field2", "this field uses the pulsing codec as the test", Field.Store.NO, Field.Index.ANALYZED);
|
||||||
provider.setFieldCodec(field2.name(), pulsing.name);
|
provider.setFieldCodec(field2.name(), "Pulsing");
|
||||||
doc.add(field2);
|
doc.add(field2);
|
||||||
|
|
||||||
Field idField = newField("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
|
Field idField = newField("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||||
provider.setFieldCodec(idField.name(), pulsing.name);
|
provider.setFieldCodec(idField.name(), "Pulsing");
|
||||||
|
|
||||||
doc.add(idField);
|
doc.add(idField);
|
||||||
for(int i=0;i<NUM_DOCS;i++) {
|
for(int i=0;i<NUM_DOCS;i++) {
|
||||||
|
@ -659,16 +533,13 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
w.commit();
|
w.commit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: now delete id=77");
|
||||||
|
}
|
||||||
w.deleteDocuments(new Term("id", "77"));
|
w.deleteDocuments(new Term("id", "77"));
|
||||||
|
|
||||||
IndexReader r = IndexReader.open(w, true);
|
IndexReader r = IndexReader.open(w, true);
|
||||||
IndexReader[] subs = r.getSequentialSubReaders();
|
IndexReader[] subs = r.getSequentialSubReaders();
|
||||||
// test each segment
|
|
||||||
for(int i=0;i<subs.length;i++) {
|
|
||||||
testTermsOrder(subs[i]);
|
|
||||||
}
|
|
||||||
// test each multi-reader
|
|
||||||
testTermsOrder(r);
|
|
||||||
|
|
||||||
assertEquals(NUM_DOCS-1, r.numDocs());
|
assertEquals(NUM_DOCS-1, r.numDocs());
|
||||||
IndexSearcher s = newSearcher(r);
|
IndexSearcher s = newSearcher(r);
|
||||||
|
@ -689,7 +560,6 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
assertEquals(0, s.search(new TermQuery(new Term("id", "77")), 1).totalHits);
|
assertEquals(0, s.search(new TermQuery(new Term("id", "77")), 1).totalHits);
|
||||||
assertEquals(0, s.search(new TermQuery(new Term("id", "44")), 1).totalHits);
|
assertEquals(0, s.search(new TermQuery(new Term("id", "44")), 1).totalHits);
|
||||||
|
|
||||||
testTermsOrder(r);
|
|
||||||
r.close();
|
r.close();
|
||||||
s.close();
|
s.close();
|
||||||
|
|
||||||
|
@ -697,25 +567,4 @@ public class TestExternalCodecs extends LuceneTestCase {
|
||||||
|
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testTermsOrder(IndexReader r) throws Exception {
|
|
||||||
|
|
||||||
// Verify sort order matches what my comparator said:
|
|
||||||
BytesRef lastBytesRef = null;
|
|
||||||
TermsEnum terms = MultiFields.getFields(r).terms("id").iterator();
|
|
||||||
//System.out.println("id terms:");
|
|
||||||
while(true) {
|
|
||||||
BytesRef t = terms.next();
|
|
||||||
if (t == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
//System.out.println(" " + t);
|
|
||||||
if (lastBytesRef == null) {
|
|
||||||
lastBytesRef = new BytesRef(t);
|
|
||||||
} else {
|
|
||||||
assertTrue("terms in wrong order last=" + lastBytesRef.utf8ToString() + " current=" + t.utf8ToString(), reverseUnicodeComparator.compare(lastBytesRef, t) < 0);
|
|
||||||
lastBytesRef.copy(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2784,6 +2784,9 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
final String id = ""+i;
|
final String id = ""+i;
|
||||||
idField.setValue(id);
|
idField.setValue(id);
|
||||||
docs.put(id, doc);
|
docs.put(id, doc);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: add doc id=" + id);
|
||||||
|
}
|
||||||
|
|
||||||
for(int field: fieldIDs) {
|
for(int field: fieldIDs) {
|
||||||
final String s;
|
final String s;
|
||||||
|
@ -2802,7 +2805,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
if (rand.nextInt(5) == 3 && i > 0) {
|
if (rand.nextInt(5) == 3 && i > 0) {
|
||||||
final String delID = ""+rand.nextInt(i);
|
final String delID = ""+rand.nextInt(i);
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: delete doc " + delID);
|
System.out.println("TEST: delete doc id=" + delID);
|
||||||
}
|
}
|
||||||
w.deleteDocuments(new Term("id", delID));
|
w.deleteDocuments(new Term("id", delID));
|
||||||
docs.remove(delID);
|
docs.remove(delID);
|
||||||
|
@ -2825,6 +2828,9 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
|
|
||||||
for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
|
for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
|
||||||
String testID = idsList[rand.nextInt(idsList.length)];
|
String testID = idsList[rand.nextInt(idsList.length)];
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: test id=" + testID);
|
||||||
|
}
|
||||||
TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
|
TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
|
||||||
assertEquals(1, hits.totalHits);
|
assertEquals(1, hits.totalHits);
|
||||||
Document doc = r.document(hits.scoreDocs[0].doc);
|
Document doc = r.document(hits.scoreDocs[0].doc);
|
||||||
|
|
|
@ -124,7 +124,11 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
} else {
|
} else {
|
||||||
iwc.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
iwc.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||||
}
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: make index");
|
||||||
|
}
|
||||||
IndexWriter writer = new IndexWriter(dir1, iwc);
|
IndexWriter writer = new IndexWriter(dir1, iwc);
|
||||||
|
writer.setInfoStream(VERBOSE ? System.out : null);
|
||||||
|
|
||||||
// create the index
|
// create the index
|
||||||
createIndexNoClose(!optimize, "index1", writer);
|
createIndexNoClose(!optimize, "index1", writer);
|
||||||
|
@ -146,6 +150,9 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
IndexReader r2 = writer.getReader();
|
IndexReader r2 = writer.getReader();
|
||||||
assertTrue(r2.isCurrent());
|
assertTrue(r2.isCurrent());
|
||||||
assertEquals(0, count(new Term("id", id10), r2));
|
assertEquals(0, count(new Term("id", id10), r2));
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: verify id");
|
||||||
|
}
|
||||||
assertEquals(1, count(new Term("id", Integer.toString(8000)), r2));
|
assertEquals(1, count(new Term("id", Integer.toString(8000)), r2));
|
||||||
|
|
||||||
r1.close();
|
r1.close();
|
||||||
|
|
|
@ -79,6 +79,9 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private int automatonQueryNrHits(AutomatonQuery query) throws IOException {
|
private int automatonQueryNrHits(AutomatonQuery query) throws IOException {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: run aq=" + query);
|
||||||
|
}
|
||||||
return searcher.search(query, 5).totalHits;
|
return searcher.search(query, 5).totalHits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue