mirror of https://github.com/apache/lucene.git
LUCENE-5675: checkpoint current dirty state
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1595530 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fa51d5972a
commit
d6131e155b
|
@ -45,7 +45,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// Lazy init:
|
||||
IndexInput in;
|
||||
|
||||
private static boolean DEBUG = true;
|
||||
static boolean DEBUG = true;
|
||||
|
||||
private IDVersionSegmentTermsEnumFrame[] stack;
|
||||
private final IDVersionSegmentTermsEnumFrame staticFrame;
|
||||
|
@ -55,6 +55,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
// nocommit make this public "for casting" and add a getVersion method?
|
||||
|
||||
// nocommit unused?
|
||||
private int targetBeforeCurrentLength;
|
||||
|
||||
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
|
||||
|
@ -218,7 +219,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
private String brToString(BytesRef b) {
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
|
@ -253,6 +254,8 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
int targetUpto;
|
||||
Pair<BytesRef,Long> output;
|
||||
|
||||
long startFrameFP = currentFrame.fp;
|
||||
|
||||
targetBeforeCurrentLength = currentFrame.ord;
|
||||
|
||||
// nocommit we could stop earlier w/ the version check, every time we traverse an index arc we can check?
|
||||
|
@ -353,6 +356,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
currentFrame = lastFrame;
|
||||
currentFrame.rewind();
|
||||
// nocommit put this back to BT also?
|
||||
term.length = targetUpto;
|
||||
termExists = false;
|
||||
} else {
|
||||
// Target is exactly the same as current term
|
||||
assert term.length == target.length;
|
||||
|
@ -374,6 +380,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
System.out.println(" term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is same as current; return true");
|
||||
|
@ -412,10 +419,11 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
|
||||
}
|
||||
if (DEBUG) {
|
||||
System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
|
||||
}
|
||||
|
||||
// We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
|
||||
while (targetUpto < target.length) {
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
|
||||
|
@ -445,19 +453,36 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
//System.out.println(" check maxVersion=" + currentFrame.maxIDVersion + " vs " + minIDVersion);
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs minIDVersion=" + minIDVersion);
|
||||
}
|
||||
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
//termExists = false;
|
||||
//term.bytes[targetUpto] = (byte) targetLabel;
|
||||
//term.length = 1+targetUpto;
|
||||
if (currentFrame.fp != startFrameFP) {
|
||||
//if (targetUpto+1 > term.length) {
|
||||
termExists = false;
|
||||
term.bytes[targetUpto] = (byte) targetLabel;
|
||||
term.length = 1+targetUpto;
|
||||
if (DEBUG) {
|
||||
System.out.println(" reset current term");
|
||||
}
|
||||
validIndexPrefix = Math.min(validIndexPrefix, term.length);
|
||||
}
|
||||
//if (currentFrame.ord != startFrameOrd) {
|
||||
//termExists = false;
|
||||
//}
|
||||
if (DEBUG) {
|
||||
System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix);
|
||||
System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
currentFrame.loadBlock();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" scan currentFrame ord=" + currentFrame.ord);
|
||||
}
|
||||
final SeekStatus result = currentFrame.scanToTerm(target, true);
|
||||
if (result == SeekStatus.FOUND) {
|
||||
currentFrame.decodeMetaData();
|
||||
|
@ -484,6 +509,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// Follow this arc
|
||||
arc = nextArc;
|
||||
term.bytes[targetUpto] = (byte) targetLabel;
|
||||
termExists = false;
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
|
@ -491,7 +517,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" index: follow label=" + Integer.toHexString((target.bytes[target.offset + targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
|
||||
System.out.println(" index: follow label=" + (char) ((target.bytes[target.offset + targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
|
||||
}
|
||||
targetUpto++;
|
||||
|
||||
|
@ -518,8 +544,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs minIDVersion=" + minIDVersion);
|
||||
}
|
||||
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
// nocommit need same logic here as above?
|
||||
termExists = false;
|
||||
term.length = targetUpto;
|
||||
return false;
|
||||
|
@ -813,9 +844,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
assert f != null;
|
||||
final BytesRef prefix = new BytesRef(term.bytes, 0, f.prefix);
|
||||
if (f.nextEnt == -1) {
|
||||
out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
|
||||
out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
|
||||
} else {
|
||||
out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
|
||||
out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + brToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
|
||||
}
|
||||
if (fr.index != null) {
|
||||
assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
||||
|
|
|
@ -36,6 +36,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
boolean hasTermsOrig;
|
||||
boolean isFloor;
|
||||
|
||||
static boolean DEBUG = true;
|
||||
|
||||
/** Highest version of any term in this block. */
|
||||
long maxIDVersion;
|
||||
|
||||
|
@ -218,10 +220,13 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
void rewind() {
|
||||
System.out.println(" rewind frame ord=" + ord);
|
||||
|
||||
// Force reload:
|
||||
fp = fpOrig;
|
||||
nextEnt = -1;
|
||||
// nocommit move to BT too?
|
||||
//state.termBlockOrd = 0;
|
||||
hasTerms = hasTermsOrig;
|
||||
if (isFloor) {
|
||||
floorDataReader.rewind();
|
||||
|
@ -321,22 +326,22 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
public void scanToFloorFrame(BytesRef target) {
|
||||
|
||||
if (!isFloor || target.length <= prefix) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char) targetLabel) + " vs nextFloorLabel=" + ((char) nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
}
|
||||
|
||||
if (targetLabel < nextFloorLabel) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" already on correct block");
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" already on correct block");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -347,25 +352,25 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
final long code = floorDataReader.readVLong();
|
||||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
}
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
numFollowFloorBlocks--;
|
||||
|
||||
if (isLastInFloor) {
|
||||
nextFloorLabel = 256;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" stop! last block nextFloorLabel=" + toHex(nextFloorLabel));
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" stop! last block nextFloorLabel=" + ((char) nextFloorLabel));
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
if (targetLabel < nextFloorLabel) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" stop! nextFloorLabel=" + toHex(nextFloorLabel));
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" stop! nextFloorLabel=" + ((char) nextFloorLabel));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -373,26 +378,28 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
if (newFP != fp) {
|
||||
// Force re-load of the block:
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp);
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp);
|
||||
}
|
||||
nextEnt = -1;
|
||||
fp = newFP;
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" stay on same fp=" + newFP);
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" stay on same fp=" + newFP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void decodeMetaData() throws IOException {
|
||||
|
||||
//if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
||||
System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
||||
|
||||
assert nextEnt >= 0;
|
||||
|
||||
// lazily catch up on metadata decode:
|
||||
final int limit = getTermBlockOrd();
|
||||
boolean absolute = metaDataUpto == 0;
|
||||
assert limit > 0;
|
||||
|
||||
// TODO: better API would be "jump straight to term=N"???
|
||||
while (metaDataUpto < limit) {
|
||||
|
@ -483,7 +490,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
|
||||
if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + IDVersionSegmentTermsEnum.brToString(target) + " term=" + IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -506,13 +513,13 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
suffix = suffixesReader.readVInt();
|
||||
|
||||
// if (DEBUG) {
|
||||
// BytesRef suffixBytesRef = new BytesRef();
|
||||
// suffixBytesRef.bytes = suffixBytes;
|
||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// }
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytesRef = new BytesRef();
|
||||
suffixBytesRef.bytes = suffixBytes;
|
||||
suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
suffixBytesRef.length = suffix;
|
||||
System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + IDVersionSegmentTermsEnum.brToString(suffixBytesRef));
|
||||
}
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
|
@ -609,7 +616,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
//if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
|
||||
if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + IDVersionSegmentTermsEnum.brToString(target) + " term=" + IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
|
|
@ -140,18 +140,18 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
}
|
||||
|
||||
if (expectedVersion == null) {
|
||||
assertEquals(-1, lookup.lookup(idValueBytes));
|
||||
assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
|
||||
} else {
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" lookup exact version (should be found)");
|
||||
}
|
||||
assertTrue(lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
|
||||
assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" lookup version+1 (should not be found)");
|
||||
}
|
||||
assertEquals(-1, lookup.lookup(idValueBytes, expectedVersion.longValue()+1));
|
||||
assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue()+1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -606,14 +606,15 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// Write the top count entries on the pending stack as
|
||||
// one or more blocks. Returns how many blocks were
|
||||
// written. If the entry count is <= maxItemsPerBlock
|
||||
// one or more blocks. If the entry count is <= maxItemsPerBlock
|
||||
// we just write a single block; else we break into
|
||||
// primary (initial) block and then one or more
|
||||
// following floor blocks:
|
||||
|
||||
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
|
||||
if (prefixLength == 0 || count <= maxItemsInBlock) {
|
||||
System.out.println("writeBlocks count=" + count);
|
||||
// nocommit nuke the prefixLength == 0 case, but testVaryingTermsPerSegment fails!!
|
||||
if (count <= maxItemsInBlock) {
|
||||
// Easy case: not floor block. Eg, prefix is "foo",
|
||||
// and we found 30 terms/sub-blocks starting w/ that
|
||||
// prefix, and minItemsInBlock <= 30 <=
|
||||
|
@ -621,6 +622,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final PendingBlock nonFloorBlock = writeBlock(prevTerm, prefixLength, prefixLength, count, count, 0, false, -1, true);
|
||||
nonFloorBlock.compileIndex(null, scratchBytes);
|
||||
pending.add(nonFloorBlock);
|
||||
System.out.println(" 1 block");
|
||||
} else {
|
||||
// Floor block case. Eg, prefix is "foo" but we
|
||||
// have 100 terms/sub-blocks starting w/ that
|
||||
|
@ -777,6 +779,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
floorBlocks.add(floorBlock);
|
||||
}
|
||||
curStart -= pendingCount;
|
||||
System.out.println(" floor=" + pendingCount);
|
||||
//System.out.println(" = " + pendingCount);
|
||||
pendingCount = 0;
|
||||
|
||||
|
|
|
@ -49,10 +49,10 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
boolean termExists;
|
||||
final FieldReader fr;
|
||||
|
||||
// nocommit make this public "for casting" and add a getVersion method?
|
||||
|
||||
private int targetBeforeCurrentLength;
|
||||
|
||||
static boolean DEBUG = true;
|
||||
|
||||
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
|
||||
|
||||
// What prefix of the current term was present in the index:
|
||||
|
@ -69,6 +69,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
public SegmentTermsEnum(FieldReader fr) throws IOException {
|
||||
this.fr = fr;
|
||||
System.out.println("STE: init");
|
||||
|
||||
//if (DEBUG) System.out.println("BTTR.init seg=" + segment);
|
||||
stack = new SegmentTermsEnumFrame[0];
|
||||
|
@ -295,6 +296,19 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit we need a seekExact(BytesRef target, long minVersion) API?
|
||||
|
||||
@Override
|
||||
|
@ -310,10 +324,10 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
assert clearEOF();
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + segment + " target=" + fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
|
||||
FST.Arc<BytesRef> arc;
|
||||
int targetUpto;
|
||||
|
@ -352,16 +366,13 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
// First compare up to valid seek frames:
|
||||
while (targetUpto < targetLimit) {
|
||||
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
}
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
arc = arcs[1+targetUpto];
|
||||
//if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
|
||||
//System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
|
||||
//}
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output != BlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = BlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
|
@ -382,9 +393,9 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
final int targetLimit2 = Math.min(target.length, term.length);
|
||||
while (targetUpto < targetLimit2) {
|
||||
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
// }
|
||||
if (DEBUG) {
|
||||
System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
}
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -461,6 +472,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
// We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
|
||||
while (targetUpto < target.length) {
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
|
||||
|
@ -718,6 +730,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
//System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
|
||||
//}
|
||||
|
||||
// We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
|
||||
while (targetUpto < target.length) {
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
|
||||
|
|
|
@ -169,6 +169,14 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
return in.seekCeil(text);
|
||||
}
|
||||
|
||||
// nocommit tests angry about this; need to use VirtualMethod to decide when to call in.X vs super.X, but this is important because BT's
|
||||
// seekExact is not being used today! maybe we are masking bugs
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return in.seekExact(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
in.seekExact(ord);
|
||||
|
|
|
@ -966,4 +966,44 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
w.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
// nocommit mark slow/nigthly: O(N^2)!!
|
||||
|
||||
// Stresses out many-terms-in-root-block case:
|
||||
public void testVaryingTermsPerSegment() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Set<BytesRef> terms = new HashSet<BytesRef>();
|
||||
int MAX_TERMS = 10000;
|
||||
while (terms.size() < MAX_TERMS) {
|
||||
terms.add(new BytesRef(TestUtil.randomSimpleString(random())));
|
||||
}
|
||||
List<BytesRef> termsList = new ArrayList<>(terms);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(int termCount=0;termCount<10000;termCount++) {
|
||||
System.out.println("\nTEST: termCount=" + termCount);
|
||||
sb.append(' ');
|
||||
sb.append(termsList.get(termCount).utf8ToString());
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", sb.toString(), Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
assertEquals(1, r.leaves().size());
|
||||
TermsEnum te = r.leaves().get(0).reader().fields().terms("field").iterator(null);
|
||||
System.out.println("te=" + te);
|
||||
for(int i=0;i<=termCount;i++) {
|
||||
//System.out.println("TEST: lookup (should exist) " + termsList.get(i));
|
||||
assertTrue("term '" + termsList.get(i).utf8ToString() + "' should exist but doesn't", te.seekExact(termsList.get(i)));
|
||||
}
|
||||
for(int i=termCount+1;i<termsList.size();i++) {
|
||||
//System.out.println("TEST: lookup (should not exist) " + termsList.get(i));
|
||||
assertFalse("term '" + termsList.get(i) + "' shouldn't exist but does", te.seekExact(termsList.get(i)));
|
||||
}
|
||||
r.close();
|
||||
w.shutdown();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -234,6 +234,11 @@ public class AssertingAtomicReader extends FilterAtomicReader {
|
|||
super.seekExact(term, state);
|
||||
this.state = State.POSITIONED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AssertingTermsEnum(" + in + ")";
|
||||
}
|
||||
}
|
||||
|
||||
static enum DocsEnumState { START, ITERATING, FINISHED };
|
||||
|
@ -682,4 +687,4 @@ public class AssertingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
|
||||
private final Object cacheKey = new Object();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -176,6 +176,9 @@ public class TestRuleLimitSysouts extends TestRuleAdapter {
|
|||
}
|
||||
|
||||
protected boolean isEnforced() {
|
||||
return false;
|
||||
// nocommit
|
||||
/*
|
||||
Class<?> target = RandomizedTest.getContext().getTargetClass();
|
||||
|
||||
if (LuceneTestCase.VERBOSE ||
|
||||
|
@ -189,6 +192,7 @@ public class TestRuleLimitSysouts extends TestRuleAdapter {
|
|||
}
|
||||
|
||||
return true;
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue