LUCENE-5675: fix nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596512 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-05-21 08:57:59 +00:00
parent d6968c3924
commit 18d2cfaf9c
13 changed files with 184 additions and 269 deletions

View File

@ -80,6 +80,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
lastDocID = docID; lastDocID = docID;
lastPosition = -1; lastPosition = -1;
lastVersion = -1;
} }
@Override @Override
@ -94,7 +95,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
if (payload.length != 8) { if (payload.length != 8) {
throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")"); throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")");
} }
lastVersion = IDVersionPostingsFormat.bytesToLong(payload); lastVersion = IDVersionPostingsFormat.bytesToLong(payload);
if (lastVersion < 0) { if (lastVersion < 0) {
throw new IllegalArgumentException("version must be >= 0 (got: " + lastVersion + "; payload=" + payload + ")"); throw new IllegalArgumentException("version must be >= 0 (got: " + lastVersion + "; payload=" + payload + ")");

View File

@ -230,15 +230,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
return ((IDVersionTermState) currentFrame.state).idVersion; return ((IDVersionTermState) currentFrame.state).idVersion;
} }
/** Returns false if the term deos not exist, or it exists but its version is too old (< minIDVersion). */ /** Returns false if the term does not exist, or it exists but its version is too old (< minIDVersion). */
public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException { public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {
if (fr.index == null) { if (fr.index == null) {
throw new IllegalStateException("terms index was not loaded"); throw new IllegalStateException("terms index was not loaded");
} }
// nocommit would be nice if somehow on doing deletes we didn't have to double-lookup again...
if (term.bytes.length <= target.length) { if (term.bytes.length <= target.length) {
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length); term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
} }
@ -260,7 +258,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
boolean changed = false; boolean changed = false;
// nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check? // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can check?
if (currentFrame != staticFrame) { if (currentFrame != staticFrame) {
@ -380,7 +378,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
} }
return false; return false;
} }
System.out.println(" term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord); // System.out.println(" term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
if (DEBUG) { if (DEBUG) {
System.out.println(" target is same as current; return true"); System.out.println(" target is same as current; return true");

View File

@ -220,11 +220,10 @@ final class IDVersionSegmentTermsEnumFrame {
} }
void rewind() { void rewind() {
// Force reload: // Force reload:
fp = fpOrig; fp = fpOrig;
nextEnt = -1; nextEnt = -1;
// nocommit move to BT too?
//state.termBlockOrd = 0;
hasTerms = hasTermsOrig; hasTerms = hasTermsOrig;
if (isFloor) { if (isFloor) {
floorDataReader.rewind(); floorDataReader.rewind();
@ -390,8 +389,7 @@ final class IDVersionSegmentTermsEnumFrame {
public void decodeMetaData() throws IOException { public void decodeMetaData() throws IOException {
//if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd); //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
assert nextEnt >= 0; assert nextEnt >= 0;

View File

@ -136,9 +136,7 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
} }
// verify // verify
if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) { CodecUtil.checksumEntireFile(indexIn);
CodecUtil.checksumEntireFile(indexIn);
}
// Have PostingsReader init itself // Have PostingsReader init itself
postingsReader.init(in); postingsReader.init(in);
@ -167,15 +165,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong(); final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt(); final int docCount = in.readVInt();
final int longsSize = version >= VersionBlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0; final int longsSize = in.readVInt();
BytesRef minTerm, maxTerm; BytesRef minTerm = readBytesRef(in);
if (version >= VersionBlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) { BytesRef maxTerm = readBytesRef(in);
minTerm = readBytesRef(in);
maxTerm = readBytesRef(in);
} else {
minTerm = maxTerm = null;
}
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
} }
@ -217,9 +210,6 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME, int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_CODEC_NAME,
VersionBlockTreeTermsWriter.VERSION_START, VersionBlockTreeTermsWriter.VERSION_START,
VersionBlockTreeTermsWriter.VERSION_CURRENT); VersionBlockTreeTermsWriter.VERSION_CURRENT);
if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
dirOffset = input.readLong();
}
return version; return version;
} }
@ -228,22 +218,14 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME, int version = CodecUtil.checkHeader(input, VersionBlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
VersionBlockTreeTermsWriter.VERSION_START, VersionBlockTreeTermsWriter.VERSION_START,
VersionBlockTreeTermsWriter.VERSION_CURRENT); VersionBlockTreeTermsWriter.VERSION_CURRENT);
if (version < VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
indexDirOffset = input.readLong();
}
return version; return version;
} }
/** Seek {@code input} to the directory offset. */ /** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input, long dirOffset) private void seekDir(IndexInput input, long dirOffset)
throws IOException { throws IOException {
if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) { input.seek(input.length() - CodecUtil.footerLength() - 8);
input.seek(input.length() - CodecUtil.footerLength() - 8); dirOffset = input.readLong();
dirOffset = input.readLong();
} else if (version >= VersionBlockTreeTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
input.seek(dirOffset); input.seek(dirOffset);
} }
@ -306,12 +288,10 @@ final class VersionBlockTreeTermsReader extends FieldsProducer {
@Override @Override
public void checkIntegrity() throws IOException { public void checkIntegrity() throws IOException {
if (version >= VersionBlockTreeTermsWriter.VERSION_CHECKSUM) { // term dictionary
// term dictionary CodecUtil.checksumEntireFile(in);
CodecUtil.checksumEntireFile(in);
// postings // postings
postingsReader.checkIntegrity(); postingsReader.checkIntegrity();
}
} }
} }

View File

@ -84,118 +84,19 @@ import org.apache.lucene.util.packed.PackedInts;
*/ */
/** /**
* Block-based terms index and dictionary writer. * This is just like {@link BlockTreeTermsWriter}, except it also stores a version per term, and adds a method to its TermsEnum
* <p> * implementation to seekExact only if the version is >= the specified version. The version is added to the terms index to avoid seeking if
* Writes terms dict and index, block-encoding (column * no term in the block has a high enough version. The term blocks file is .tiv and the terms index extension is .tipv.
* stride) each term's metadata for each set of terms
* between two index terms.
* <p>
* Files:
* <ul>
* <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
* <li><tt>.tip</tt>: <a href="#Termindex">Term Index</a></li>
* </ul>
* <p>
* <a name="Termdictionary" id="Termdictionary"></a>
* <h3>Term Dictionary</h3>
* *
* <p>The .tim file contains the list of terms in each
* field along with per-term statistics (such as docfreq)
* and per-term metadata (typically pointers to the postings list
* for that term in the inverted index).
* </p>
*
* <p>The .tim is arranged in blocks: with blocks containing
* a variable number of entries (by default 25-48), where
* each entry is either a term or a reference to a
* sub-block.</p>
*
* <p>NOTE: The term dictionary can plug into different postings implementations:
* the postings writer/reader are actually responsible for encoding
* and decoding the Postings Metadata and Term Metadata sections.</p>
*
* <ul>
* <li>TermsDict (.tim) --&gt; Header, <i>PostingsHeader</i>, NodeBlock<sup>NumBlocks</sup>,
* FieldSummary, DirOffset, Footer</li>
* <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
* <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata</i>&gt;<sup>EntryCount</sup></li>
* <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
* <li>TermStats --&gt; DocFreq, TotalTermFreq </li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
* SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm, MaxTerm&gt;<sup>NumFields</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>MinTerm,MaxTerm --&gt; {@link DataOutput#writeVInt VInt} length followed by the byte[]</li>
* <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
* FieldNumber,RootCodeLength,DocCount,LongsSize --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt;
* {@link DataOutput#writeVLong VLong}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
* <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information
* for the BlockTree implementation.</li>
* <li>DirOffset is a pointer to the FieldSummary section.</li>
* <li>DocFreq is the count of documents which contain the term.</li>
* <li>TotalTermFreq is the total number of occurrences of the term. This is encoded
* as the difference between the total number of occurrences and the DocFreq.</li>
* <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li>
* <li>NumTerms is the number of unique terms for the field.</li>
* <li>RootCode points to the root block for the field.</li>
* <li>SumDocFreq is the total number of postings, the number of term-document pairs across
* the entire field.</li>
* <li>DocCount is the number of documents that have at least one posting for this field.</li>
* <li>LongsSize records how many long values the postings writer/reader record per term
* (e.g., to hold freq/prox/doc file offsets).
* <li>MinTerm, MaxTerm are the lowest and highest term in this field.</li>
* <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
* these contain arbitrary per-file data (such as parameters or versioning information)
* and per-term data (such as pointers to inverted files).</li>
* <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points
* to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted </li>
* </ul>
* <a name="Termindex" id="Termindex"></a>
* <h3>Term Index</h3>
* <p>The .tip file contains an index into the term dictionary, so that it can be
* accessed randomly. The index is also used to determine
* when a given term cannot exist on disk (in the .tim file), saving a disk seek.</p>
* <ul>
* <li>TermsIndex (.tip) --&gt; Header, FSTIndex<sup>NumFields</sup>
* &lt;IndexStartFP&gt;<sup>NumFields</sup>, DirOffset, Footer</li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>IndexStartFP --&gt; {@link DataOutput#writeVLong VLong}</li>
* <!-- TODO: better describe FST output here -->
* <li>FSTIndex --&gt; {@link FST FST&lt;byte[]&gt;}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes:</p>
* <ul>
* <li>The .tip file contains a separate FST for each
* field. The FST maps a term prefix to the on-disk
* block that holds all terms starting with that
* prefix. Each field's IndexStartFP points to its
* FST.</li>
* <li>DirOffset is a pointer to the start of the IndexStartFPs
* for all fields</li>
* <li>It's possible that an on-disk block would contain
* too many terms (more than the allowed maximum
* (default: 48)). When this happens, the block is
* sub-divided into new blocks (called "floor
* blocks"), and then the output in the FST for the
* block's prefix encodes the leading byte of each
* sub-block, and its file pointer.
* </ul>
*
* @see BlockTreeTermsReader
* @lucene.experimental * @lucene.experimental
*/ */
// nocommit fix jdocs
final class VersionBlockTreeTermsWriter extends FieldsConsumer { final class VersionBlockTreeTermsWriter extends FieldsConsumer {
private static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(), static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
PositiveIntOutputs.getSingleton()); PositiveIntOutputs.getSingleton());
static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput(); static final Pair<BytesRef,Long> NO_OUTPUT = FST_OUTPUTS.getNoOutput();
@ -224,25 +125,11 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
/** Initial terms format. */ /** Initial terms format. */
public static final int VERSION_START = 0; public static final int VERSION_START = 0;
// nocommit nuke all these old versions
/** Append-only */
public static final int VERSION_APPEND_ONLY = 1;
/** Meta data as array */
public static final int VERSION_META_ARRAY = 2;
/** checksums */
public static final int VERSION_CHECKSUM = 3;
/** min/max term */
public static final int VERSION_MIN_MAX_TERMS = 4;
/** Current terms format. */ /** Current terms format. */
public static final int VERSION_CURRENT = VERSION_MIN_MAX_TERMS; public static final int VERSION_CURRENT = VERSION_START;
/** Extension of terms index file */ /** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tip"; static final String TERMS_INDEX_EXTENSION = "tipv";
final static String TERMS_INDEX_CODEC_NAME = "VERSION_BLOCK_TREE_TERMS_INDEX"; final static String TERMS_INDEX_CODEC_NAME = "VERSION_BLOCK_TREE_TERMS_INDEX";
private final IndexOutput out; private final IndexOutput out;
@ -297,7 +184,6 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
int maxItemsInBlock) int maxItemsInBlock)
throws IOException throws IOException
{ {
System.out.println("VBTTW minItemsInBlock=" + minItemsInBlock + " maxItemsInBlock=" + maxItemsInBlock);
if (minItemsInBlock <= 1) { if (minItemsInBlock <= 1) {
throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock); throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock);
} }
@ -626,8 +512,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
// following floor blocks: // following floor blocks:
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException { void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
// nocommit why can't we do floor blocks for root frame? if (count <= maxItemsInBlock) {
if (prefixLength == 0 || count <= maxItemsInBlock) {
// Easy case: not floor block. Eg, prefix is "foo", // Easy case: not floor block. Eg, prefix is "foo",
// and we found 30 terms/sub-blocks starting w/ that // and we found 30 terms/sub-blocks starting w/ that
// prefix, and minItemsInBlock <= 30 <= // prefix, and minItemsInBlock <= 30 <=
@ -645,7 +530,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
// TODO: we could store min & max suffix start byte // TODO: we could store min & max suffix start byte
// in each block, to make floor blocks authoritative // in each block, to make floor blocks authoritative
//if (DEBUG) { if (DEBUG) {
final BytesRef prefix = new BytesRef(prefixLength); final BytesRef prefix = new BytesRef(prefixLength);
for(int m=0;m<prefixLength;m++) { for(int m=0;m<prefixLength;m++) {
prefix.bytes[m] = (byte) prevTerm.ints[m]; prefix.bytes[m] = (byte) prevTerm.ints[m];
@ -653,7 +538,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength; prefix.length = prefixLength;
//System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix); //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size()); System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
//} }
//System.out.println("\nwbs count=" + count); //System.out.println("\nwbs count=" + count);
final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength]; final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
@ -874,9 +759,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
// Write block header: // Write block header:
out.writeVInt((length<<1)|(isLastInFloor ? 1:0)); out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
// if (DEBUG) { if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor); System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
// } }
// 1st pass: pack term suffix bytes into byte[] blob // 1st pass: pack term suffix bytes into byte[] blob
// TODO: cutover to bulk int codec... simple64? // TODO: cutover to bulk int codec... simple64?
@ -920,12 +805,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
BlockTermState state = term.state; BlockTermState state = term.state;
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion); maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
final int suffix = term.term.length - prefixLength; final int suffix = term.term.length - prefixLength;
// if (DEBUG) { if (DEBUG) {
BytesRef suffixBytes = new BytesRef(suffix); BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix); System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix; suffixBytes.length = suffix;
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes)); System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
// } }
// For leaf block we write suffix straight // For leaf block we write suffix straight
suffixWriter.writeVInt(suffix); suffixWriter.writeVInt(suffix);
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix); suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
@ -957,12 +842,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
BlockTermState state = term.state; BlockTermState state = term.state;
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion); maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
final int suffix = term.term.length - prefixLength; final int suffix = term.term.length - prefixLength;
// if (DEBUG) { if (DEBUG) {
BytesRef suffixBytes = new BytesRef(suffix); BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix); System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix; suffixBytes.length = suffix;
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes)); System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
// } }
// For non-leaf block we borrow 1 bit to record // For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block // if entry is term or sub-block
suffixWriter.writeVInt(suffix<<1); suffixWriter.writeVInt(suffix<<1);
@ -1007,12 +892,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix); suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
assert block.fp < startFP; assert block.fp < startFP;
// if (DEBUG) { if (DEBUG) {
BytesRef suffixBytes = new BytesRef(suffix); BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix; suffixBytes.length = suffix;
System.out.println(" " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); System.out.println(" " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// } }
suffixWriter.writeVLong(startFP - block.fp); suffixWriter.writeVLong(startFP - block.fp);
subIndices.add(block.index); subIndices.add(block.index);

View File

@ -28,7 +28,7 @@ import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
// nocommit can we take a BytesRef token instead? // TODO: can we take a BytesRef token instead?
/** Produces a single String token from the provided value, with the provided payload. */ /** Produces a single String token from the provided value, with the provided payload. */
class StringAndPayloadField extends Field { class StringAndPayloadField extends Field {

View File

@ -49,6 +49,7 @@ import org.apache.lucene.index.PerThreadPKLookup;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -90,16 +91,16 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
String next(); String next();
} }
// nocommit make a similar test for BT, w/ varied IDs: // TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception { public void testRandom() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
// nocommit randomize the block sizes: int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); int maxItemsInBlock = 2*(minItemsInBlock-1) + random().nextInt(50);
// nocommit put back iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
//RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
IndexWriter w = new IndexWriter(dir, iwc); //IndexWriter w = new IndexWriter(dir, iwc);
int numDocs = atLeast(1000); int numDocs = atLeast(1000);
Map<String,Long> idValues = new HashMap<String,Long>(); Map<String,Long> idValues = new HashMap<String,Long>();
int docUpto = 0; int docUpto = 0;
@ -210,9 +211,10 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion); System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
} }
List<String> idsList = new ArrayList<>();
long version = 0; long version = 0;
while (docUpto < numDocs) { while (docUpto < numDocs) {
// nocommit add deletes in
String idValue = idPrefix + ids.next(); String idValue = idPrefix + ids.next();
if (idValues.containsKey(idValue)) { if (idValues.containsKey(idValue)) {
continue; continue;
@ -229,11 +231,38 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
Document doc = new Document(); Document doc = new Document();
doc.add(makeIDField(idValue, version)); doc.add(makeIDField(idValue, version));
w.addDocument(doc); w.addDocument(doc);
idsList.add(idValue);
if (idsList.size() > 0 && random().nextInt(7) == 5) {
// Randomly delete or update a previous ID
idValue = idsList.get(random().nextInt(idsList.size()));
if (random().nextBoolean()) {
if (useMonotonicVersion) {
version += TestUtil.nextInt(random(), 1, 10);
} else {
version = random().nextLong() & 0x7fffffffffffffffL;
}
doc = new Document();
doc.add(makeIDField(idValue, version));
if (VERBOSE) {
System.out.println(" update " + idValue + " -> " + version);
}
w.updateDocument(new Term("id", idValue), doc);
idValues.put(idValue, version);
} else {
if (VERBOSE) {
System.out.println(" delete " + idValue);
}
w.deleteDocuments(new Term("id", idValue));
idValues.remove(idValue);
}
}
docUpto++; docUpto++;
} }
//IndexReader r = w.getReader(); IndexReader r = w.getReader();
IndexReader r = DirectoryReader.open(w, true); //IndexReader r = DirectoryReader.open(w, true);
PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id"); PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet()); List<Map.Entry<String,Long>> idValuesList = new ArrayList<>(idValues.entrySet());
@ -242,7 +271,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
String idValue; String idValue;
if (random().nextBoolean()) { if (random().nextBoolean()) {
idValue = idValuesList.get(random().nextInt(numDocs)).getKey(); idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
} else if (random().nextBoolean()) { } else if (random().nextBoolean()) {
idValue = ids.next(); idValue = ids.next();
} else { } else {
@ -318,14 +347,6 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
payload.length = 8; payload.length = 8;
IDVersionPostingsFormat.longToBytes(version, payload); IDVersionPostingsFormat.longToBytes(version, payload);
return new StringAndPayloadField("id", id, payload); return new StringAndPayloadField("id", id, payload);
/*
Field field = newTextField("id", "", Field.Store.NO);
Token token = new Token(id, 0, id.length());
token.setPayload(payload);
field.setTokenStream(new CannedTokenStream(token));
return field;
*/
} }
public void testMoreThanOneDocPerIDOneSegment() throws Exception { public void testMoreThanOneDocPerIDOneSegment() throws Exception {
@ -353,6 +374,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
iwc.setMergePolicy(new TieredMergePolicy());
MergeScheduler ms = iwc.getMergeScheduler(); MergeScheduler ms = iwc.getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) { if (ms instanceof ConcurrentMergeScheduler) {
iwc.setMergeScheduler(new ConcurrentMergeScheduler() { iwc.setMergeScheduler(new ConcurrentMergeScheduler() {
@ -362,7 +384,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
} }
}); });
} }
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document(); Document doc = new Document();
doc.add(makeIDField("id", 17)); doc.add(makeIDField("id", 17));
w.addDocument(doc); w.addDocument(doc);
@ -380,7 +402,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
// expected // expected
assertTrue(ioe.getCause() instanceof IllegalArgumentException); assertTrue(ioe.getCause() instanceof IllegalArgumentException);
} }
w.w.close(); w.close();
dir.close(); dir.close();
} }

View File

@ -37,7 +37,7 @@ import org.apache.lucene.util.Bits;
* time. * time.
* @lucene.experimental */ * @lucene.experimental */
// nocommit mv under blocktree? but ... it's used by others (e.g. block terms) // TODO: maybe move under blocktree? but it's used by other terms dicts (e.g. Block)
// TODO: find a better name; this defines the API that the // TODO: find a better name; this defines the API that the
// terms dict impls use to talk to a postings impl. // terms dict impls use to talk to a postings impl.

View File

@ -1640,22 +1640,15 @@ public class CheckIndex {
// Again, with the one doc deleted: // Again, with the one doc deleted:
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose); checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose);
// Only agg stats if the doc is live: if (liveDocs != null && liveDocs.get(j) == false) {
final boolean doStats = liveDocs == null || liveDocs.get(j); // Only check live docs
if (doStats == false) {
// nocommit is it OK to stop verifying deleted docs?
continue; continue;
} }
if (doStats) { status.docCount++;
status.docCount++;
}
for(String field : tfv) { for(String field : tfv) {
if (doStats) { status.totVectors++;
status.totVectors++;
}
// Make sure FieldInfo thinks this field is vector'd: // Make sure FieldInfo thinks this field is vector'd:
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);

View File

@ -94,6 +94,8 @@ final class DefaultIndexingChain extends DocConsumer {
// aborting on any exception from this method // aborting on any exception from this method
int numDocs = state.segmentInfo.getDocCount(); int numDocs = state.segmentInfo.getDocCount();
// TODO: we could set liveDocs earlier and then fix DVs to also not write deleted docs:
writeNorms(state); writeNorms(state);
writeDocValues(state); writeDocValues(state);

View File

@ -34,7 +34,6 @@ final class FreqProxTermsWriter extends TermsHash {
} }
private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException { private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
System.out.println("applyDeletes segUpdates=" + state.segUpdates);
// Process any pending Term deletes for this newly // Process any pending Term deletes for this newly
// flushed segment: // flushed segment:
@ -108,8 +107,6 @@ final class FreqProxTermsWriter extends TermsHash {
fields.setLiveDocs(state.liveDocs); fields.setLiveDocs(state.liveDocs);
} }
System.out.println("now: " + state.liveDocs + " pf=" + state.segmentInfo.getCodec().postingsFormat());
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state); FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
boolean success = false; boolean success = false;
try { try {

View File

@ -1696,7 +1696,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{
if (doWait) { if (doWait) {
synchronized(this) { synchronized(this) {
while(true) { while(true) {
if (hitOOM) { if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge"); throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
} }

View File

@ -17,8 +17,7 @@ package org.apache.lucene.uninverting;
* limitations under the License. * limitations under the License.
*/ */
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
@ -47,14 +46,17 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
public class TestFieldCacheVsDocValues extends LuceneTestCase { public class TestFieldCacheVsDocValues extends LuceneTestCase {
public void testByteMissingVsFieldCache() throws Exception { public void testByteMissingVsFieldCache() throws Exception {
@ -315,14 +317,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
// delete some docs // delete some docs
// nocommit hmmm what to do
/*
int numDeletions = random().nextInt(numDocs/10); int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) { for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs); int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id))); writer.deleteDocuments(new Term("id", Integer.toString(id)));
} }
*/
writer.shutdown(); writer.shutdown();
// compare // compare
@ -331,7 +330,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
AtomicReader r = context.reader(); AtomicReader r = context.reader();
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed"); SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
SortedDocValues actual = r.getSortedDocValues("dv"); SortedDocValues actual = r.getSortedDocValues("dv");
assertEquals(r.maxDoc(), expected, actual); assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
} }
ir.close(); ir.close();
dir.close(); dir.close();
@ -382,14 +381,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
// delete some docs // delete some docs
// nocommit hmmm what to do
/*
int numDeletions = random().nextInt(numDocs/10); int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) { for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs); int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id))); writer.deleteDocuments(new Term("id", Integer.toString(id)));
} }
*/
// compare per-segment // compare per-segment
DirectoryReader ir = writer.getReader(); DirectoryReader ir = writer.getReader();
@ -397,7 +393,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
AtomicReader r = context.reader(); AtomicReader r = context.reader();
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
SortedSetDocValues actual = r.getSortedSetDocValues("dv"); SortedSetDocValues actual = r.getSortedSetDocValues("dv");
assertEquals(r.maxDoc(), expected, actual); assertEquals(r.maxDoc(), r.getLiveDocs(), expected, actual);
} }
ir.close(); ir.close();
@ -408,7 +404,7 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
AtomicReader ar = getOnlySegmentReader(ir); AtomicReader ar = getOnlySegmentReader(ir);
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
SortedSetDocValues actual = ar.getSortedSetDocValues("dv"); SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
assertEquals(ir.maxDoc(), expected, actual); assertEquals(ir.maxDoc(), ar.getLiveDocs(), expected, actual);
ir.close(); ir.close();
writer.shutdown(); writer.shutdown();
@ -449,14 +445,11 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
// delete some docs // delete some docs
// nocommit hmmm what to do
/*
int numDeletions = random().nextInt(numDocs/10); int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) { for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs); int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id))); writer.deleteDocuments(new Term("id", Integer.toString(id)));
} }
*/
// merge some segments and ensure that at least one of them has more than // merge some segments and ensure that at least one of them has more than
// 256 values // 256 values
@ -496,102 +489,149 @@ public class TestFieldCacheVsDocValues extends LuceneTestCase {
} }
} }
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception { private void assertEquals(int maxDoc, Bits liveDocs, SortedDocValues expected, SortedDocValues actual) throws Exception {
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual)); assertEquals(maxDoc, liveDocs, DocValues.singleton(expected), DocValues.singleton(actual));
} }
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception { private void assertEquals(int maxDoc, Bits liveDocs, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
// can be null for the segment if no docs actually had any SortedDocValues // can be null for the segment if no docs actually had any SortedDocValues
// in this case FC.getDocTermsOrds returns EMPTY // in this case FC.getDocTermsOrds returns EMPTY
if (actual == null) { if (actual == null) {
assertEquals(DocValues.EMPTY_SORTED_SET, expected); assertEquals(DocValues.EMPTY_SORTED_SET, expected);
return; return;
} }
assertEquals(expected.getValueCount(), actual.getValueCount());
// compare ord lists FixedBitSet liveOrdsExpected = new FixedBitSet((int) expected.getValueCount());
FixedBitSet liveOrdsActual = new FixedBitSet((int) actual.getValueCount());
BytesRef expectedBytes = new BytesRef();
BytesRef actualBytes = new BytesRef();
// compare values for all live docs:
for (int i = 0; i < maxDoc; i++) { for (int i = 0; i < maxDoc; i++) {
if (liveDocs != null && liveDocs.get(i) == false) {
// Don't check deleted docs
continue;
}
expected.setDocument(i); expected.setDocument(i);
actual.setDocument(i); actual.setDocument(i);
long expectedOrd; long expectedOrd;
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) { while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
assertEquals(expectedOrd, actual.nextOrd()); expected.lookupOrd(expectedOrd, expectedBytes);
long actualOrd = actual.nextOrd();
assertTrue(actualOrd != NO_MORE_ORDS);
actual.lookupOrd(actualOrd, actualBytes);
assertEquals(expectedBytes, actualBytes);
liveOrdsExpected.set((int) expectedOrd);
liveOrdsActual.set((int) actualOrd);
} }
assertEquals(NO_MORE_ORDS, actual.nextOrd()); assertEquals(NO_MORE_ORDS, actual.nextOrd());
} }
// Make sure both have same number of non-deleted values:
assertEquals(liveOrdsExpected.cardinality(), liveOrdsActual.cardinality());
// compare ord dictionary // compare ord dictionary
BytesRef expectedBytes = new BytesRef(); int expectedOrd = 0;
BytesRef actualBytes = new BytesRef(); int actualOrd = 0;
for (long i = 0; i < expected.getValueCount(); i++) { while (expectedOrd < expected.getValueCount()) {
expected.lookupTerm(expectedBytes); expectedOrd = liveOrdsExpected.nextSetBit(expectedOrd);
actual.lookupTerm(actualBytes); if (expectedOrd == -1) {
break;
}
actualOrd = liveOrdsActual.nextSetBit(actualOrd);
expected.lookupOrd(expectedOrd, expectedBytes);
actual.lookupOrd(actualOrd, actualBytes);
assertEquals(expectedBytes, actualBytes); assertEquals(expectedBytes, actualBytes);
expectedOrd++;
actualOrd++;
} }
assertTrue(actualOrd == actual.getValueCount() || liveOrdsActual.nextSetBit(actualOrd) == -1);
// compare termsenum // compare termsenum
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum()); assertEquals(expected.getValueCount(), expected.termsEnum(), liveOrdsExpected, actual.termsEnum(), liveOrdsActual);
} }
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception { /** Does termsEnum.next() but then skips over deleted ords. */
private static BytesRef next(TermsEnum termsEnum, Bits liveOrds) throws IOException {
while (termsEnum.next() != null) {
if (liveOrds.get((int) termsEnum.ord())) {
return termsEnum.term();
}
}
return null;
}
/** Does termsEnum.seekCeil() but then skips over deleted ords. */
private static SeekStatus seekCeil(TermsEnum termsEnum, BytesRef term, Bits liveOrds) throws IOException {
SeekStatus status = termsEnum.seekCeil(term);
if (status == SeekStatus.END) {
return status;
} else {
if (liveOrds.get((int) termsEnum.ord()) == false) {
while (termsEnum.next() != null) {
if (liveOrds.get((int) termsEnum.ord())) {
return SeekStatus.NOT_FOUND;
}
}
return SeekStatus.END;
} else {
return status;
}
}
}
private void assertEquals(long numOrds, TermsEnum expected, Bits liveOrdsExpected, TermsEnum actual, Bits liveOrdsActual) throws Exception {
BytesRef ref; BytesRef ref;
// sequential next() through all terms // sequential next() through all terms
while ((ref = expected.next()) != null) { while ((ref = next(expected, liveOrdsExpected)) != null) {
assertEquals(ref, actual.next()); assertEquals(ref, next(actual, liveOrdsActual));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term()); assertEquals(expected.term(), actual.term());
} }
assertNull(next(actual, liveOrdsActual));
// sequential seekExact(BytesRef) through all terms // sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) { for (long i = 0; i < numOrds; i++) {
if (liveOrdsExpected.get((int) i) == false) {
continue;
}
expected.seekExact(i); expected.seekExact(i);
assertTrue(actual.seekExact(expected.term())); assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term()); assertEquals(expected.term(), actual.term());
} }
// sequential seekCeil(BytesRef) through all terms // sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) { for (long i = 0; i < numOrds; i++) {
if (liveOrdsExpected.get((int) i) == false) {
continue;
}
expected.seekExact(i); expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term())); assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term()); assertEquals(expected.term(), actual.term());
} }
// random seekExact(BytesRef) // random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) { for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1); long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
if (liveOrdsExpected.get((int) randomOrd) == false) {
continue;
}
expected.seekExact(randomOrd); expected.seekExact(randomOrd);
actual.seekExact(expected.term()); actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term()); assertEquals(expected.term(), actual.term());
} }
// random seekCeil(BytesRef) // random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) { for (long i = 0; i < numOrds; i++) {
if (liveOrdsExpected.get((int) i) == false) {
continue;
}
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random())); BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = expected.seekCeil(target); SeekStatus expectedStatus = seekCeil(expected, target, liveOrdsExpected);
assertEquals(expectedStatus, actual.seekCeil(target)); assertEquals(expectedStatus, seekCeil(actual, target, liveOrdsActual));
if (expectedStatus != SeekStatus.END) { if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term()); assertEquals(expected.term(), actual.term());
} }
} }