mirror of https://github.com/apache/lucene.git
LUCENE-5675: fix nocommits
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596512 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d6968c3924
commit
18d2cfaf9c
|
@ -80,6 +80,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
||||||
|
|
||||||
lastDocID = docID;
|
lastDocID = docID;
|
||||||
lastPosition = -1;
|
lastPosition = -1;
|
||||||
|
lastVersion = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -230,15 +230,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
||||||
return ((IDVersionTermState) currentFrame.state).idVersion;
|
return ((IDVersionTermState) currentFrame.state).idVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */
|
/** Returns false if the term does not exist, or it exists but its version is too old (< minIDVersion). */
|
||||||
public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
|
public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
|
||||||
|
|
||||||
if (fr.index == null) {
|
if (fr.index == null) {
|
||||||
throw new IllegalStateException("terms index was not loaded");
|
throw new IllegalStateException("terms index was not loaded");
|
||||||
}
|
}
|
||||||
|
|
||||||
// nocommit would be nice if somehow on doing deletes we didn't have to double-lookup again...
|
|
||||||
|
|
||||||
if (term.bytes.length <= target.length) {
|
if (term.bytes.length <= target.length) {
|
||||||
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
|
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
|
||||||
}
|
}
|
||||||
|
@ -260,7 +258,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
boolean changed = false;
|
boolean changed = false;
|
||||||
|
|
||||||
// nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check?
|
// TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can check?
|
||||||
|
|
||||||
if (currentFrame != staticFrame) {
|
if (currentFrame != staticFrame) {
|
||||||
|
|
||||||
|
@ -380,7 +378,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
System.out.println(" term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
|
// System.out.println(" term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" target is same as current; return true");
|
System.out.println(" target is same as current; return true");
|
||||||
|
|
|
@ -220,11 +220,10 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
void rewind() {
|
void rewind() {
|
||||||
|
|
||||||
// Force reload:
|
// Force reload:
|
||||||
fp = fpOrig;
|
fp = fpOrig;
|
||||||
nextEnt = -1;
|
nextEnt = -1;
|
||||||
// nocommit move to BT too?
|
|
||||||
//state.termBlockOrd = 0;
|
|
||||||
hasTerms = hasTermsOrig;
|
hasTerms = hasTermsOrig;
|
||||||
if (isFloor) {
|
if (isFloor) {
|
||||||
floorDataReader.rewind();
|
floorDataReader.rewind();
|
||||||
|
@ -390,8 +389,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
|
|
||||||
public void decodeMetaData() throws IOException {
|
public void decodeMetaData() throws IOException {
|
||||||
|
|
||||||
//if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
//if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
||||||
System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
|
||||||
|
|
||||||
assert nextEnt >= 0;
|
assert nextEnt >= 0;
|
||||||
|
|
||||||
|
|
|
@ -136,9 +136,7 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// verify
|
// verify
|
||||||
if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
|
|
||||||
CodecUtil.checksumEntireFile(indexIn);
|
CodecUtil.checksumEntireFile(indexIn);
|
||||||
}
|
|
||||||
|
|
||||||
// Have PostingsReader init itself
|
// Have PostingsReader init itself
|
||||||
postingsReader.init(in);
|
postingsReader.init(in);
|
||||||
|
@ -167,15 +165,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
|
||||||
final long sumDocFreq = in.readVLong();
|
final long sumDocFreq = in.readVLong();
|
||||||
final int docCount = in.readVInt();
|
final int docCount = in.readVInt();
|
||||||
final int longsSize = version >= VersionBlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
|
final int longsSize = in.readVInt();
|
||||||
|
|
||||||
BytesRef minTerm, maxTerm;
|
BytesRef minTerm = readBytesRef(in);
|
||||||
if (version >= VersionBlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) {
|
BytesRef maxTerm = readBytesRef(in);
|
||||||
minTerm = readBytesRef(in);
|
|
||||||
maxTerm = readBytesRef(in);
|
|
||||||
} else {
|
|
||||||
minTerm = maxTerm = null;
|
|
||||||
}
|
|
||||||
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
|
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
|
||||||
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
|
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
|
||||||
}
|
}
|
||||||
|
@ -217,9 +210,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME,
|
int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME,
|
||||||
VersionBlockTreeTermsWriter.VERSION_START,
|
VersionBlockTreeTermsWriter.VERSION_START,
|
||||||
VersionBlockTreeTermsWriter.VERSION_CURRENT);
|
VersionBlockTreeTermsWriter.VERSION_CURRENT);
|
||||||
if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
|
|
||||||
dirOffset = input.readLong();
|
|
||||||
}
|
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,22 +218,14 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
|
int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
|
||||||
VersionBlockTreeTermsWriter.VERSION_START,
|
VersionBlockTreeTermsWriter.VERSION_START,
|
||||||
VersionBlockTreeTermsWriter.VERSION_CURRENT);
|
VersionBlockTreeTermsWriter.VERSION_CURRENT);
|
||||||
if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
|
|
||||||
indexDirOffset = input.readLong();
|
|
||||||
}
|
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Seek {@code input} to the directory offset. */
|
/** Seek {@code input} to the directory offset. */
|
||||||
private void seekDir(IndexInput input, long dirOffset)
|
private void seekDir(IndexInput input, long dirOffset)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
|
|
||||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||||
dirOffset = input.readLong();
|
dirOffset = input.readLong();
|
||||||
} else if (version >= VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
|
|
||||||
input.seek(input.length() - 8);
|
|
||||||
dirOffset = input.readLong();
|
|
||||||
}
|
|
||||||
input.seek(dirOffset);
|
input.seek(dirOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,7 +288,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) {
|
|
||||||
// term dictionary
|
// term dictionary
|
||||||
CodecUtil.checksumEntireFile(in);
|
CodecUtil.checksumEntireFile(in);
|
||||||
|
|
||||||
|
@ -314,4 +295,3 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
postingsReader.checkIntegrity();
|
postingsReader.checkIntegrity();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
|
@ -84,116 +84,17 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Block-based terms index and dictionary writer.
|
* This is just like {@link BlockTreeTermsWriter}, except it also stores a version per term, and adds a method to its TermsEnum
|
||||||
* <p>
|
* implementation to seekExact only if the version is >= the specified version. The version is added to the terms index to avoid seeking if
|
||||||
* Writes terms dict and index, block-encoding (column
|
* no term in the block has a high enough version. The term blocks file is .tiv and the terms index extension is .tipv.
|
||||||
* stride) each term's metadata for each set of terms
|
|
||||||
* between two index terms.
|
|
||||||
* <p>
|
|
||||||
* Files:
|
|
||||||
* <ul>
|
|
||||||
* <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
|
|
||||||
* <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
|
|
||||||
* </ul>
|
|
||||||
* <p>
|
|
||||||
* <a name="Termdictionary" id="Termdictionary"></a>
|
|
||||||
* <h3>Term Dictionary</h3>
|
|
||||||
*
|
*
|
||||||
* <p>The .tim file contains the list of terms in each
|
|
||||||
* field along with per-term statistics (such as docfreq)
|
|
||||||
* and per-term metadata (typically pointers to the postings list
|
|
||||||
* for that term in the inverted index).
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
* <p>The .tim is arranged in blocks: with blocks containing
|
|
||||||
* a variable number of entries (by default 25-48), where
|
|
||||||
* each entry is either a term or a reference to a
|
|
||||||
* sub-block.</p>
|
|
||||||
*
|
|
||||||
* <p>NOTE: The term dictionary can plug into different postings implementations:
|
|
||||||
* the postings writer/reader are actually responsible for encoding
|
|
||||||
* and decoding the Postings Metadata and Term Metadata sections.</p>
|
|
||||||
*
|
|
||||||
* <ul>
|
|
||||||
* <li>TermsDict (.tim) --> Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
|
|
||||||
* FieldSummary, DirOffset, Footer</li>
|
|
||||||
* <li>NodeBlock --> (OuterNode | InnerNode)</li>
|
|
||||||
* <li>OuterNode --> EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata</i>><sup>EntryCount</sup></li>
|
|
||||||
* <li>InnerNode --> EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, < TermStats ? ><sup>EntryCount</sup>, MetaLength, <<i>TermMetadata ? </i>><sup>EntryCount</sup></li>
|
|
||||||
* <li>TermStats --> DocFreq, TotalTermFreq </li>
|
|
||||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
|
|
||||||
* SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm, MaxTerm><sup>NumFields</sup></li>
|
|
||||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
|
||||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
|
||||||
* <li>MinTerm,MaxTerm --> {@link DataOutput#writeVInt VInt} length followed by the byte[]</li>
|
|
||||||
* <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
|
|
||||||
* FieldNumber,RootCodeLength,DocCount,LongsSize --> {@link DataOutput#writeVInt VInt}</li>
|
|
||||||
* <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq -->
|
|
||||||
* {@link DataOutput#writeVLong VLong}</li>
|
|
||||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>Notes:</p>
|
|
||||||
* <ul>
|
|
||||||
* <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
|
|
||||||
* for the BlockTree implementation.</li>
|
|
||||||
* <li>DirOffset is a pointer to the FieldSummary section.</li>
|
|
||||||
* <li>DocFreq is the count of documents which contain the term.</li>
|
|
||||||
* <li>TotalTermFreq is the total number of occurrences of the term. This is encoded
|
|
||||||
* as the difference between the total number of occurrences and the DocFreq.</li>
|
|
||||||
* <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li>
|
|
||||||
* <li>NumTerms is the number of unique terms for the field.</li>
|
|
||||||
* <li>RootCode points to the root block for the field.</li>
|
|
||||||
* <li>SumDocFreq is the total number of postings, the number of term-document pairs across
|
|
||||||
* the entire field.</li>
|
|
||||||
* <li>DocCount is the number of documents that have at least one posting for this field.</li>
|
|
||||||
* <li>LongsSize records how many long values the postings writer/reader record per term
|
|
||||||
* (e.g., to hold freq/prox/doc file offsets).
|
|
||||||
* <li>MinTerm, MaxTerm are the lowest and highest term in this field.</li>
|
|
||||||
* <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
|
|
||||||
* these contain arbitrary per-file data (such as parameters or versioning information)
|
|
||||||
* and per-term data (such as pointers to inverted files).</li>
|
|
||||||
* <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points
|
|
||||||
* to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted </li>
|
|
||||||
* </ul>
|
|
||||||
* <a name="Termindex" id="Termindex"></a>
|
|
||||||
* <h3>Term Index</h3>
|
|
||||||
* <p>The .tip file contains an index into the term dictionary, so that it can be
|
|
||||||
* accessed randomly. The index is also used to determine
|
|
||||||
* when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
|
|
||||||
* <ul>
|
|
||||||
* <li>TermsIndex (.tip) --> Header, FSTIndex<sup>NumFields</sup>
|
|
||||||
* <IndexStartFP><sup>NumFields</sup>, DirOffset, Footer</li>
|
|
||||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
|
||||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
|
||||||
* <li>IndexStartFP --> {@link DataOutput#writeVLong VLong}</li>
|
|
||||||
* <!-- TODO: better describe FST output here -->
|
|
||||||
* <li>FSTIndex --> {@link FST FST<byte[]>}</li>
|
|
||||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>Notes:</p>
|
|
||||||
* <ul>
|
|
||||||
* <li>The .tip file contains a separate FST for each
|
|
||||||
* field. The FST maps a term prefix to the on-disk
|
|
||||||
* block that holds all terms starting with that
|
|
||||||
* prefix. Each field's IndexStartFP points to its
|
|
||||||
* FST.</li>
|
|
||||||
* <li>DirOffset is a pointer to the start of the IndexStartFPs
|
|
||||||
* for all fields</li>
|
|
||||||
* <li>It's possible that an on-disk block would contain
|
|
||||||
* too many terms (more than the allowed maximum
|
|
||||||
* (default: 48)). When this happens, the block is
|
|
||||||
* sub-divided into new blocks (called "floor
|
|
||||||
* blocks"), and then the output in the FST for the
|
|
||||||
* block's prefix encodes the leading byte of each
|
|
||||||
* sub-block, and its file pointer.
|
|
||||||
* </ul>
|
|
||||||
*
|
|
||||||
* @see BlockTreeTermsReader
|
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
// nocommit fix jdocs
|
|
||||||
final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
|
private static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
|
||||||
|
|
||||||
static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
|
static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
|
||||||
PositiveIntOutputs.getSingleton());
|
PositiveIntOutputs.getSingleton());
|
||||||
|
|
||||||
|
@ -224,25 +125,11 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
/** Initial terms format. */
|
/** Initial terms format. */
|
||||||
public static final int VERSION_START = 0;
|
public static final int VERSION_START = 0;
|
||||||
|
|
||||||
// nocommit nuke all these old versions
|
|
||||||
|
|
||||||
/** Append-only */
|
|
||||||
public static final int VERSION_APPEND_ONLY = 1;
|
|
||||||
|
|
||||||
/** Meta data as array */
|
|
||||||
public static final int VERSION_META_ARRAY = 2;
|
|
||||||
|
|
||||||
/** checksums */
|
|
||||||
public static final int VERSION_CHECKSUM = 3;
|
|
||||||
|
|
||||||
/** min/max term */
|
|
||||||
public static final int VERSION_MIN_MAX_TERMS = 4;
|
|
||||||
|
|
||||||
/** Current terms format. */
|
/** Current terms format. */
|
||||||
public static final int VERSION_CURRENT = VERSION_MIN_MAX_TERMS;
|
public static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
/** Extension of terms index file */
|
/** Extension of terms index file */
|
||||||
static final String TERMS_INDEX_EXTENSION = "tip";
|
static final String TERMS_INDEX_EXTENSION = "tipv";
|
||||||
final static String TERMS_INDEX_CODEC_NAME = "VERSION_BLOCK_TREE_TERMS_INDEX";
|
final static String TERMS_INDEX_CODEC_NAME = "VERSION_BLOCK_TREE_TERMS_INDEX";
|
||||||
|
|
||||||
private final IndexOutput out;
|
private final IndexOutput out;
|
||||||
|
@ -297,7 +184,6 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
int maxItemsInBlock)
|
int maxItemsInBlock)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
System.out.println("VBTTW minItemsInBlock=" + minItemsInBlock + " maxItemsInBlock=" + maxItemsInBlock);
|
|
||||||
if (minItemsInBlock <= 1) {
|
if (minItemsInBlock <= 1) {
|
||||||
throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
|
throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
|
||||||
}
|
}
|
||||||
|
@ -626,8 +512,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// following floor blocks:
|
// following floor blocks:
|
||||||
|
|
||||||
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
|
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
|
||||||
// nocommit why can't we do floor blocks for root frame?
|
if (count <= maxItemsInBlock) {
|
||||||
if (prefixLength == 0 || count <= maxItemsInBlock) {
|
|
||||||
// Easy case: not floor block. Eg, prefix is "foo",
|
// Easy case: not floor block. Eg, prefix is "foo",
|
||||||
// and we found 30 terms/sub-blocks starting w/ that
|
// and we found 30 terms/sub-blocks starting w/ that
|
||||||
// prefix, and minItemsInBlock <= 30 <=
|
// prefix, and minItemsInBlock <= 30 <=
|
||||||
|
@ -645,7 +530,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// TODO: we could store min & max suffix start byte
|
// TODO: we could store min & max suffix start byte
|
||||||
// in each block, to make floor blocks authoritative
|
// in each block, to make floor blocks authoritative
|
||||||
|
|
||||||
//if (DEBUG) {
|
if (DEBUG) {
|
||||||
final BytesRef prefix = new BytesRef(prefixLength);
|
final BytesRef prefix = new BytesRef(prefixLength);
|
||||||
for(int m=0;m<prefixLength;m++) {
|
for(int m=0;m<prefixLength;m++) {
|
||||||
prefix.bytes[m] = (byte) prevTerm.ints[m];
|
prefix.bytes[m] = (byte) prevTerm.ints[m];
|
||||||
|
@ -653,7 +538,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
prefix.length = prefixLength;
|
prefix.length = prefixLength;
|
||||||
//System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
|
//System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
|
||||||
System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
|
System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
|
||||||
//}
|
}
|
||||||
//System.out.println("\nwbs count=" + count);
|
//System.out.println("\nwbs count=" + count);
|
||||||
|
|
||||||
final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
|
final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
|
||||||
|
@ -874,9 +759,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
// Write block header:
|
// Write block header:
|
||||||
out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
|
out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
|
||||||
|
|
||||||
// if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
|
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
|
||||||
// }
|
}
|
||||||
|
|
||||||
// 1st pass: pack term suffix bytes into byte[] blob
|
// 1st pass: pack term suffix bytes into byte[] blob
|
||||||
// TODO: cutover to bulk int codec... simple64?
|
// TODO: cutover to bulk int codec... simple64?
|
||||||
|
@ -920,12 +805,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
BlockTermState state = term.state;
|
BlockTermState state = term.state;
|
||||||
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
|
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
|
||||||
final int suffix = term.term.length - prefixLength;
|
final int suffix = term.term.length - prefixLength;
|
||||||
// if (DEBUG) {
|
if (DEBUG) {
|
||||||
BytesRef suffixBytes = new BytesRef(suffix);
|
BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
suffixBytes.length = suffix;
|
suffixBytes.length = suffix;
|
||||||
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
||||||
// }
|
}
|
||||||
// For leaf block we write suffix straight
|
// For leaf block we write suffix straight
|
||||||
suffixWriter.writeVInt(suffix);
|
suffixWriter.writeVInt(suffix);
|
||||||
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
|
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
|
||||||
|
@ -957,12 +842,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
BlockTermState state = term.state;
|
BlockTermState state = term.state;
|
||||||
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
|
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
|
||||||
final int suffix = term.term.length - prefixLength;
|
final int suffix = term.term.length - prefixLength;
|
||||||
// if (DEBUG) {
|
if (DEBUG) {
|
||||||
BytesRef suffixBytes = new BytesRef(suffix);
|
BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
suffixBytes.length = suffix;
|
suffixBytes.length = suffix;
|
||||||
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
||||||
// }
|
}
|
||||||
// For non-leaf block we borrow 1 bit to record
|
// For non-leaf block we borrow 1 bit to record
|
||||||
// if entry is term or sub-block
|
// if entry is term or sub-block
|
||||||
suffixWriter.writeVInt(suffix<<1);
|
suffixWriter.writeVInt(suffix<<1);
|
||||||
|
@ -1007,12 +892,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||||
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
|
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
|
||||||
assert block.fp < startFP;
|
assert block.fp < startFP;
|
||||||
|
|
||||||
// if (DEBUG) {
|
if (DEBUG) {
|
||||||
BytesRef suffixBytes = new BytesRef(suffix);
|
BytesRef suffixBytes = new BytesRef(suffix);
|
||||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||||
suffixBytes.length = suffix;
|
suffixBytes.length = suffix;
|
||||||
System.out.println(" " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
System.out.println(" " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||||
// }
|
}
|
||||||
|
|
||||||
suffixWriter.writeVLong(startFP - block.fp);
|
suffixWriter.writeVLong(startFP - block.fp);
|
||||||
subIndices.add(block.index);
|
subIndices.add(block.index);
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
// nocommit can we take a BytesRef token instead?
|
// TODO: can we take a BytesRef token instead?
|
||||||
|
|
||||||
/** Produces a single String token from the provided value, with the provided payload. */
|
/** Produces a single String token from the provided value, with the provided payload. */
|
||||||
class StringAndPayloadField extends Field {
|
class StringAndPayloadField extends Field {
|
||||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.lucene.index.PerThreadPKLookup;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.index.TieredMergePolicy;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -90,16 +91,16 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
String next();
|
String next();
|
||||||
}
|
}
|
||||||
|
|
||||||
// nocommit make a similar test for BT, w/ varied IDs:
|
// TODO make a similar test for BT, w/ varied IDs:
|
||||||
|
|
||||||
public void testRandom() throws Exception {
|
public void testRandom() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
// nocommit randomize the block sizes:
|
int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
|
||||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
int maxItemsInBlock = 2*(minItemsInBlock-1) + random().nextInt(50);
|
||||||
// nocommit put back
|
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
|
||||||
//RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||||
IndexWriter w = new IndexWriter(dir, iwc);
|
//IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
int numDocs = atLeast(1000);
|
int numDocs = atLeast(1000);
|
||||||
Map<String,Long> idValues = new HashMap<String,Long>();
|
Map<String,Long> idValues = new HashMap<String,Long>();
|
||||||
int docUpto = 0;
|
int docUpto = 0;
|
||||||
|
@ -210,9 +211,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
|
System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<String> idsList = new ArrayList<>();
|
||||||
|
|
||||||
long version = 0;
|
long version = 0;
|
||||||
while (docUpto < numDocs) {
|
while (docUpto < numDocs) {
|
||||||
// nocommit add deletes in
|
|
||||||
String idValue = idPrefix + ids.next();
|
String idValue = idPrefix + ids.next();
|
||||||
if (idValues.containsKey(idValue)) {
|
if (idValues.containsKey(idValue)) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -229,11 +231,38 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(makeIDField(idValue, version));
|
doc.add(makeIDField(idValue, version));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
idsList.add(idValue);
|
||||||
|
|
||||||
|
if (idsList.size() > 0 && random().nextInt(7) == 5) {
|
||||||
|
// Randomly delete or update a previous ID
|
||||||
|
idValue = idsList.get(random().nextInt(idsList.size()));
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
if (useMonotonicVersion) {
|
||||||
|
version += TestUtil.nextInt(random(), 1, 10);
|
||||||
|
} else {
|
||||||
|
version = random().nextLong() & 0x7fffffffffffffffL;
|
||||||
|
}
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(makeIDField(idValue, version));
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" update " + idValue + " -> " + version);
|
||||||
|
}
|
||||||
|
w.updateDocument(new Term("id", idValue), doc);
|
||||||
|
idValues.put(idValue, version);
|
||||||
|
} else {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" delete " + idValue);
|
||||||
|
}
|
||||||
|
w.deleteDocuments(new Term("id", idValue));
|
||||||
|
idValues.remove(idValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
docUpto++;
|
docUpto++;
|
||||||
}
|
}
|
||||||
|
|
||||||
//IndexReader r = w.getReader();
|
IndexReader r = w.getReader();
|
||||||
IndexReader r = DirectoryReader.open(w, true);
|
//IndexReader r = DirectoryReader.open(w, true);
|
||||||
PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
|
PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
|
||||||
|
|
||||||
List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet());
|
List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet());
|
||||||
|
@ -242,7 +271,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
String idValue;
|
String idValue;
|
||||||
|
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
idValue = idValuesList.get(random().nextInt(numDocs)).getKey();
|
idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
|
||||||
} else if (random().nextBoolean()) {
|
} else if (random().nextBoolean()) {
|
||||||
idValue = ids.next();
|
idValue = ids.next();
|
||||||
} else {
|
} else {
|
||||||
|
@ -318,14 +347,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
payload.length = 8;
|
payload.length = 8;
|
||||||
IDVersionPostingsFormat.longToBytes(version, payload);
|
IDVersionPostingsFormat.longToBytes(version, payload);
|
||||||
return new StringAndPayloadField("id", id, payload);
|
return new StringAndPayloadField("id", id, payload);
|
||||||
|
|
||||||
/*
|
|
||||||
Field field = newTextField("id", "", Field.Store.NO);
|
|
||||||
Token token = new Token(id, 0, id.length());
|
|
||||||
token.setPayload(payload);
|
|
||||||
field.setTokenStream(new CannedTokenStream(token));
|
|
||||||
return field;
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMoreThanOneDocPerIDOneSegment() throws Exception {
|
public void testMoreThanOneDocPerIDOneSegment() throws Exception {
|
||||||
|
@ -353,6 +374,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
|
||||||
|
iwc.setMergePolicy(new TieredMergePolicy());
|
||||||
MergeScheduler ms = iwc.getMergeScheduler();
|
MergeScheduler ms = iwc.getMergeScheduler();
|
||||||
if (ms instanceof ConcurrentMergeScheduler) {
|
if (ms instanceof ConcurrentMergeScheduler) {
|
||||||
iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
|
iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
|
||||||
|
@ -362,7 +384,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(makeIDField("id", 17));
|
doc.add(makeIDField("id", 17));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
@ -380,7 +402,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
||||||
// expected
|
// expected
|
||||||
assertTrue(ioe.getCause() instanceof IllegalArgumentException);
|
assertTrue(ioe.getCause() instanceof IllegalArgumentException);
|
||||||
}
|
}
|
||||||
w.w.close();
|
w.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.util.Bits;
|
||||||
* time.
|
* time.
|
||||||
* @lucene.experimental */
|
* @lucene.experimental */
|
||||||
|
|
||||||
// nocommit mv under blocktree? but ... it's used by others (e.g. block terms)
|
// TODO: maybe move under blocktree? but it's used by other terms dicts (e.g. Block)
|
||||||
|
|
||||||
// TODO: find a better name; this defines the API that the
|
// TODO: find a better name; this defines the API that the
|
||||||
// terms dict impls use to talk to a postings impl.
|
// terms dict impls use to talk to a postings impl.
|
||||||
|
|
|
@ -1640,22 +1640,15 @@ public class CheckIndex {
|
||||||
// Again, with the one doc deleted:
|
// Again, with the one doc deleted:
|
||||||
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);
|
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);
|
||||||
|
|
||||||
// Only agg stats if the doc is live:
|
if (liveDocs != null && liveDocs.get(j) == false) {
|
||||||
final boolean doStats = liveDocs == null || liveDocs.get(j);
|
// Only check live docs
|
||||||
|
|
||||||
if (doStats == false) {
|
|
||||||
// nocommit is it OK to stop verifying deleted docs?
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doStats) {
|
|
||||||
status.docCount++;
|
status.docCount++;
|
||||||
}
|
|
||||||
|
|
||||||
for(String field : tfv) {
|
for(String field : tfv) {
|
||||||
if (doStats) {
|
|
||||||
status.totVectors++;
|
status.totVectors++;
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure FieldInfo thinks this field is vector'd:
|
// Make sure FieldInfo thinks this field is vector'd:
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||||
|
|
|
@ -94,6 +94,8 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
// aborting on any exception from this method
|
// aborting on any exception from this method
|
||||||
|
|
||||||
int numDocs = state.segmentInfo.getDocCount();
|
int numDocs = state.segmentInfo.getDocCount();
|
||||||
|
|
||||||
|
// TODO: we could set liveDocs earlier and then fix DVs to also not write deleted docs:
|
||||||
writeNorms(state);
|
writeNorms(state);
|
||||||
writeDocValues(state);
|
writeDocValues(state);
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,6 @@ final class FreqProxTermsWriter extends TermsHash {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
|
private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
|
||||||
System.out.println("applyDeletes segUpdates=" + state.segUpdates);
|
|
||||||
|
|
||||||
// Process any pending Term deletes for this newly
|
// Process any pending Term deletes for this newly
|
||||||
// flushed segment:
|
// flushed segment:
|
||||||
|
@ -108,8 +107,6 @@ final class FreqProxTermsWriter extends TermsHash {
|
||||||
fields.setLiveDocs(state.liveDocs);
|
fields.setLiveDocs(state.liveDocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
|
|
||||||
|
|
||||||
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
|
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -1696,7 +1696,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
|
||||||
if (doWait) {
|
if (doWait) {
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
while(true) {
|
while(true) {
|
||||||
|
|
||||||
if (hitOOM) {
|
if (hitOOM) {
|
||||||
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
|
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,7 @@ package org.apache.lucene.uninverting;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
import java.io.IOException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -47,14 +46,17 @@ import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.StoredDocument;
|
import org.apache.lucene.index.StoredDocument;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||||
|
|
||||||
public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
|
|
||||||
public void testByteMissingVsFieldCache() throws Exception {
|
public void testByteMissingVsFieldCache() throws Exception {
|
||||||
|
@ -315,14 +317,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete some docs
|
// delete some docs
|
||||||
// nocommit hmmm what to do
|
|
||||||
/*
|
|
||||||
int numDeletions = random().nextInt(numDocs/10);
|
int numDeletions = random().nextInt(numDocs/10);
|
||||||
for (int i = 0; i < numDeletions; i++) {
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
int id = random().nextInt(numDocs);
|
int id = random().nextInt(numDocs);
|
||||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
writer.shutdown();
|
writer.shutdown();
|
||||||
|
|
||||||
// compare
|
// compare
|
||||||
|
@ -331,7 +330,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
AtomicReader r = context.reader();
|
AtomicReader r = context.reader();
|
||||||
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
|
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
|
||||||
SortedDocValues actual = r.getSortedDocValues("dv");
|
SortedDocValues actual = r.getSortedDocValues("dv");
|
||||||
assertEquals(r.maxDoc(), expected, actual);
|
assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
|
||||||
}
|
}
|
||||||
ir.close();
|
ir.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
@ -382,14 +381,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete some docs
|
// delete some docs
|
||||||
// nocommit hmmm what to do
|
|
||||||
/*
|
|
||||||
int numDeletions = random().nextInt(numDocs/10);
|
int numDeletions = random().nextInt(numDocs/10);
|
||||||
for (int i = 0; i < numDeletions; i++) {
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
int id = random().nextInt(numDocs);
|
int id = random().nextInt(numDocs);
|
||||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
// compare per-segment
|
// compare per-segment
|
||||||
DirectoryReader ir = writer.getReader();
|
DirectoryReader ir = writer.getReader();
|
||||||
|
@ -397,7 +393,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
AtomicReader r = context.reader();
|
AtomicReader r = context.reader();
|
||||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
|
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
|
||||||
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
|
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
|
||||||
assertEquals(r.maxDoc(), expected, actual);
|
assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
|
||||||
}
|
}
|
||||||
ir.close();
|
ir.close();
|
||||||
|
|
||||||
|
@ -408,7 +404,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
AtomicReader ar = getOnlySegmentReader(ir);
|
AtomicReader ar = getOnlySegmentReader(ir);
|
||||||
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
|
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
|
||||||
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
|
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
|
||||||
assertEquals(ir.maxDoc(), expected, actual);
|
assertEquals(ir.maxDoc(), ar.getLiveDocs(), expected, actual);
|
||||||
ir.close();
|
ir.close();
|
||||||
|
|
||||||
writer.shutdown();
|
writer.shutdown();
|
||||||
|
@ -449,14 +445,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete some docs
|
// delete some docs
|
||||||
// nocommit hmmm what to do
|
|
||||||
/*
|
|
||||||
int numDeletions = random().nextInt(numDocs/10);
|
int numDeletions = random().nextInt(numDocs/10);
|
||||||
for (int i = 0; i < numDeletions; i++) {
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
int id = random().nextInt(numDocs);
|
int id = random().nextInt(numDocs);
|
||||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
// merge some segments and ensure that at least one of them has more than
|
// merge some segments and ensure that at least one of them has more than
|
||||||
// 256 values
|
// 256 values
|
||||||
|
@ -496,102 +489,149 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
private void assertEquals(int maxDoc, Bits liveDocs, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
||||||
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
|
assertEquals(maxDoc, liveDocs, DocValues.singleton(expected), DocValues.singleton(actual));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
private void assertEquals(int maxDoc, Bits liveDocs, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
||||||
// can be null for the segment if no docs actually had any SortedDocValues
|
// can be null for the segment if no docs actually had any SortedDocValues
|
||||||
// in this case FC.getDocTermsOrds returns EMPTY
|
// in this case FC.getDocTermsOrds returns EMPTY
|
||||||
if (actual == null) {
|
if (actual == null) {
|
||||||
assertEquals(DocValues.EMPTY_SORTED_SET, expected);
|
assertEquals(DocValues.EMPTY_SORTED_SET, expected);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assertEquals(expected.getValueCount(), actual.getValueCount());
|
|
||||||
// compare ord lists
|
FixedBitSet liveOrdsExpected = new FixedBitSet((int) expected.getValueCount());
|
||||||
|
FixedBitSet liveOrdsActual = new FixedBitSet((int) actual.getValueCount());
|
||||||
|
|
||||||
|
BytesRef expectedBytes = new BytesRef();
|
||||||
|
BytesRef actualBytes = new BytesRef();
|
||||||
|
|
||||||
|
// compare values for all live docs:
|
||||||
for (int i = 0; i < maxDoc; i++) {
|
for (int i = 0; i < maxDoc; i++) {
|
||||||
|
if (liveDocs != null && liveDocs.get(i) == false) {
|
||||||
|
// Don't check deleted docs
|
||||||
|
continue;
|
||||||
|
}
|
||||||
expected.setDocument(i);
|
expected.setDocument(i);
|
||||||
actual.setDocument(i);
|
actual.setDocument(i);
|
||||||
long expectedOrd;
|
long expectedOrd;
|
||||||
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
|
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
|
||||||
assertEquals(expectedOrd, actual.nextOrd());
|
expected.lookupOrd(expectedOrd, expectedBytes);
|
||||||
|
long actualOrd = actual.nextOrd();
|
||||||
|
assertTrue(actualOrd != NO_MORE_ORDS);
|
||||||
|
actual.lookupOrd(actualOrd, actualBytes);
|
||||||
|
assertEquals(expectedBytes, actualBytes);
|
||||||
|
liveOrdsExpected.set((int) expectedOrd);
|
||||||
|
liveOrdsActual.set((int) actualOrd);
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEquals(NO_MORE_ORDS, actual.nextOrd());
|
assertEquals(NO_MORE_ORDS, actual.nextOrd());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure both have same number of non-deleted values:
|
||||||
|
assertEquals(liveOrdsExpected.cardinality(), liveOrdsActual.cardinality());
|
||||||
|
|
||||||
// compare ord dictionary
|
// compare ord dictionary
|
||||||
BytesRef expectedBytes = new BytesRef();
|
int expectedOrd = 0;
|
||||||
BytesRef actualBytes = new BytesRef();
|
int actualOrd = 0;
|
||||||
for (long i = 0; i < expected.getValueCount(); i++) {
|
while (expectedOrd < expected.getValueCount()) {
|
||||||
expected.lookupTerm(expectedBytes);
|
expectedOrd = liveOrdsExpected.nextSetBit(expectedOrd);
|
||||||
actual.lookupTerm(actualBytes);
|
if (expectedOrd == -1) {
|
||||||
assertEquals(expectedBytes, actualBytes);
|
break;
|
||||||
}
|
}
|
||||||
|
actualOrd = liveOrdsActual.nextSetBit(actualOrd);
|
||||||
|
expected.lookupOrd(expectedOrd, expectedBytes);
|
||||||
|
actual.lookupOrd(actualOrd, actualBytes);
|
||||||
|
assertEquals(expectedBytes, actualBytes);
|
||||||
|
expectedOrd++;
|
||||||
|
actualOrd++;
|
||||||
|
}
|
||||||
|
assertTrue(actualOrd == actual.getValueCount() || liveOrdsActual.nextSetBit(actualOrd) == -1);
|
||||||
|
|
||||||
// compare termsenum
|
// compare termsenum
|
||||||
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
|
assertEquals(expected.getValueCount(), expected.termsEnum(), liveOrdsExpected, actual.termsEnum(), liveOrdsActual);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
|
/** Does termsEnum.next() but then skips over deleted ords. */
|
||||||
|
private static BytesRef next(TermsEnum termsEnum, Bits liveOrds) throws IOException {
|
||||||
|
while (termsEnum.next() != null) {
|
||||||
|
if (liveOrds.get((int) termsEnum.ord())) {
|
||||||
|
return termsEnum.term();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Does termsEnum.seekCeil() but then skips over deleted ords. */
|
||||||
|
private static SeekStatus seekCeil(TermsEnum termsEnum, BytesRef term, Bits liveOrds) throws IOException {
|
||||||
|
SeekStatus status = termsEnum.seekCeil(term);
|
||||||
|
if (status == SeekStatus.END) {
|
||||||
|
return status;
|
||||||
|
} else {
|
||||||
|
if (liveOrds.get((int) termsEnum.ord()) == false) {
|
||||||
|
while (termsEnum.next() != null) {
|
||||||
|
if (liveOrds.get((int) termsEnum.ord())) {
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return SeekStatus.END;
|
||||||
|
} else {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEquals(long numOrds, TermsEnum expected, Bits liveOrdsExpected, TermsEnum actual, Bits liveOrdsActual) throws Exception {
|
||||||
BytesRef ref;
|
BytesRef ref;
|
||||||
|
|
||||||
// sequential next() through all terms
|
// sequential next() through all terms
|
||||||
while ((ref = expected.next()) != null) {
|
while ((ref = next(expected, liveOrdsExpected)) != null) {
|
||||||
assertEquals(ref, actual.next());
|
assertEquals(ref, next(actual, liveOrdsActual));
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
|
||||||
}
|
|
||||||
assertNull(actual.next());
|
|
||||||
|
|
||||||
// sequential seekExact(ord) through all terms
|
|
||||||
for (long i = 0; i < numOrds; i++) {
|
|
||||||
expected.seekExact(i);
|
|
||||||
actual.seekExact(i);
|
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
assertEquals(expected.term(), actual.term());
|
||||||
}
|
}
|
||||||
|
assertNull(next(actual, liveOrdsActual));
|
||||||
|
|
||||||
// sequential seekExact(BytesRef) through all terms
|
// sequential seekExact(BytesRef) through all terms
|
||||||
for (long i = 0; i < numOrds; i++) {
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
if (liveOrdsExpected.get((int) i) == false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
expected.seekExact(i);
|
expected.seekExact(i);
|
||||||
assertTrue(actual.seekExact(expected.term()));
|
assertTrue(actual.seekExact(expected.term()));
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
assertEquals(expected.term(), actual.term());
|
||||||
}
|
}
|
||||||
|
|
||||||
// sequential seekCeil(BytesRef) through all terms
|
// sequential seekCeil(BytesRef) through all terms
|
||||||
for (long i = 0; i < numOrds; i++) {
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
if (liveOrdsExpected.get((int) i) == false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
expected.seekExact(i);
|
expected.seekExact(i);
|
||||||
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
|
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
|
||||||
}
|
|
||||||
|
|
||||||
// random seekExact(ord)
|
|
||||||
for (long i = 0; i < numOrds; i++) {
|
|
||||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
|
||||||
expected.seekExact(randomOrd);
|
|
||||||
actual.seekExact(randomOrd);
|
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
assertEquals(expected.term(), actual.term());
|
||||||
}
|
}
|
||||||
|
|
||||||
// random seekExact(BytesRef)
|
// random seekExact(BytesRef)
|
||||||
for (long i = 0; i < numOrds; i++) {
|
for (long i = 0; i < numOrds; i++) {
|
||||||
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||||
|
if (liveOrdsExpected.get((int) randomOrd) == false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
expected.seekExact(randomOrd);
|
expected.seekExact(randomOrd);
|
||||||
actual.seekExact(expected.term());
|
actual.seekExact(expected.term());
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
assertEquals(expected.term(), actual.term());
|
||||||
}
|
}
|
||||||
|
|
||||||
// random seekCeil(BytesRef)
|
// random seekCeil(BytesRef)
|
||||||
for (long i = 0; i < numOrds; i++) {
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
if (liveOrdsExpected.get((int) i) == false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
|
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
|
||||||
SeekStatus expectedStatus = expected.seekCeil(target);
|
SeekStatus expectedStatus = seekCeil(expected, target, liveOrdsExpected);
|
||||||
assertEquals(expectedStatus, actual.seekCeil(target));
|
assertEquals(expectedStatus, seekCeil(actual, target, liveOrdsActual));
|
||||||
if (expectedStatus != SeekStatus.END) {
|
if (expectedStatus != SeekStatus.END) {
|
||||||
assertEquals(expected.ord(), actual.ord());
|
|
||||||
assertEquals(expected.term(), actual.term());
|
assertEquals(expected.term(), actual.term());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue