mirror of https://github.com/apache/lucene.git
LUCENE-5675: working on ant precommit
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5675@1596708 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48b16e5fa0
commit
c5ac331b85
|
@ -29,7 +29,7 @@ import org.apache.lucene.store.DataInput;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
public final class IDVersionPostingsReader extends PostingsReaderBase {
|
||||
final class IDVersionPostingsReader extends PostingsReaderBase {
|
||||
|
||||
@Override
|
||||
public void init(IndexInput termsIn) throws IOException {
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
||||
final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
||||
|
||||
final static String TERMS_CODEC = "IDVersionPostingsWriterTerms";
|
||||
|
||||
|
@ -52,7 +52,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public IDVersionTermState newTermState() {
|
||||
public BlockTermState newTermState() {
|
||||
return new IDVersionTermState();
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,7 @@ public final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
|
||||
@Override
|
||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||
// TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on flush:
|
||||
if (state.liveDocs != null && state.liveDocs.get(docID) == false) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// Lazy init:
|
||||
IndexInput in;
|
||||
|
||||
static boolean DEBUG = false;
|
||||
// static boolean DEBUG = false;
|
||||
|
||||
private IDVersionSegmentTermsEnumFrame[] stack;
|
||||
private final IDVersionSegmentTermsEnumFrame staticFrame;
|
||||
|
@ -69,7 +69,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
@SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<Pair<BytesRef,Long>>[] arcs =
|
||||
new FST.Arc[1];
|
||||
|
||||
public IDVersionSegmentTermsEnum(VersionFieldReader fr) throws IOException {
|
||||
IDVersionSegmentTermsEnum(VersionFieldReader fr) throws IOException {
|
||||
this.fr = fr;
|
||||
|
||||
//if (DEBUG) System.out.println("BTTR.init seg=" + segment);
|
||||
|
@ -243,10 +243,10 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
assert clearEOF();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
FST.Arc<Pair<BytesRef,Long>> arc;
|
||||
int targetUpto;
|
||||
|
@ -269,9 +269,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// seeks to foobaz, we can re-use the seek state
|
||||
// for the first 5 bytes.
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
|
||||
// }
|
||||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
|
@ -291,9 +291,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// First compare up to valid seek frames:
|
||||
while (targetUpto < targetLimit) {
|
||||
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
if (DEBUG) {
|
||||
System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -321,9 +321,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
final int targetLimit2 = Math.min(target.length, term.length);
|
||||
while (targetUpto < targetLimit2) {
|
||||
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
if (DEBUG) {
|
||||
System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -340,9 +340,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// Common case: target term is after current
|
||||
// term, ie, app is seeking multiple terms
|
||||
// in sorted order
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
|
||||
// }
|
||||
currentFrame = lastFrame;
|
||||
|
||||
} else if (cmp > 0) {
|
||||
|
@ -352,9 +352,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
// (so we scan from the start)
|
||||
targetBeforeCurrentLength = 0;
|
||||
changed = true;
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
|
||||
// }
|
||||
currentFrame = lastFrame;
|
||||
currentFrame.rewind();
|
||||
} else {
|
||||
|
@ -364,30 +364,30 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is same as current maxIDVersion=" + currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" target is same as current maxIDVersion=" + currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
currentFrame.decodeMetaData();
|
||||
if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
|
||||
// This term's version is lower than the minVersion
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is same as current but version=" + ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" + minIDVersion + "; return false");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" target is same as current but version=" + ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" + minIDVersion + "; return false");
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
// System.out.println(" term version=" + ((IDVersionTermState) currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame ord=" + currentFrame.ord);
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is same as current; return true");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" target is same as current; return true");
|
||||
// }
|
||||
return true;
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
System.out.println(" target is same as current but term doesn't exist");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" target is same as current but term doesn't exist");
|
||||
// }
|
||||
}
|
||||
//validIndexPrefix = currentFrame.depth;
|
||||
//term.length = target.length;
|
||||
|
@ -404,9 +404,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" no seek state; push root frame");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" no seek state; push root frame");
|
||||
// }
|
||||
|
||||
output = arc.output;
|
||||
|
||||
|
@ -417,9 +417,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " termExists=" + termExists);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " termExists=" + termExists);
|
||||
// }
|
||||
|
||||
// We are done sharing the common prefix with the incoming target and where we are currently seek'd; now continue walking the index:
|
||||
while (targetUpto < target.length) {
|
||||
|
@ -431,9 +431,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
if (nextArc == null) {
|
||||
|
||||
// Index is exhausted
|
||||
if (DEBUG) {
|
||||
System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + Integer.toHexString(targetLabel) + " termExists=" + termExists);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + Integer.toHexString(targetLabel) + " termExists=" + termExists);
|
||||
// }
|
||||
|
||||
validIndexPrefix = currentFrame.prefix;
|
||||
//validIndexPrefix = targetUpto;
|
||||
|
@ -444,16 +444,16 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
termExists = false;
|
||||
term.bytes[targetUpto] = (byte) targetLabel;
|
||||
term.length = 1+targetUpto;
|
||||
if (DEBUG) {
|
||||
System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
//System.out.println(" check maxVersion=" + currentFrame.maxIDVersion + " vs " + minIDVersion);
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs minIDVersion=" + minIDVersion);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs minIDVersion=" + minIDVersion);
|
||||
// }
|
||||
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
|
@ -462,54 +462,54 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
termExists = false;
|
||||
term.bytes[targetUpto] = (byte) targetLabel;
|
||||
term.length = 1+targetUpto;
|
||||
if (DEBUG) {
|
||||
System.out.println(" reset current term");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" reset current term");
|
||||
// }
|
||||
validIndexPrefix = Math.min(validIndexPrefix, term.length);
|
||||
}
|
||||
//if (currentFrame.ord != startFrameOrd) {
|
||||
//termExists = false;
|
||||
//}
|
||||
if (DEBUG) {
|
||||
System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp + " termExists=" + termExists);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp + " termExists=" + termExists);
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
currentFrame.loadBlock();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" scan currentFrame ord=" + currentFrame.ord);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" scan currentFrame ord=" + currentFrame.ord);
|
||||
// }
|
||||
final SeekStatus result = currentFrame.scanToTerm(target, true);
|
||||
if (result == SeekStatus.FOUND) {
|
||||
currentFrame.decodeMetaData();
|
||||
if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
|
||||
// This term's version is lower than the minVersion
|
||||
if (DEBUG) {
|
||||
System.out.println(" return NOT_FOUND: idVersion=" + ((IDVersionTermState) currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND: idVersion=" + ((IDVersionTermState) currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
|
||||
// }
|
||||
|
||||
return true;
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Follow this arc
|
||||
arc = nextArc;
|
||||
if (term.bytes[targetUpto] != (byte) targetLabel) {
|
||||
if (DEBUG) {
|
||||
System.out.println(" now set termExists=false targetUpto=" + targetUpto + " term=" + term.bytes[targetUpto] + " targetLabel=" + targetLabel);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" now set termExists=false targetUpto=" + targetUpto + " term=" + term.bytes[targetUpto] + " targetLabel=" + targetLabel);
|
||||
// }
|
||||
changed = true;
|
||||
term.bytes[targetUpto] = (byte) targetLabel;
|
||||
termExists = false;
|
||||
|
@ -520,15 +520,15 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" index: follow label=" + (char) ((target.bytes[target.offset + targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" index: follow label=" + (char) ((target.bytes[target.offset + targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
|
||||
// }
|
||||
targetUpto++;
|
||||
|
||||
if (arc.isFinal()) {
|
||||
if (DEBUG) System.out.println(" arc is final!");
|
||||
// if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -542,15 +542,15 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
if (!currentFrame.hasTerms) {
|
||||
termExists = false;
|
||||
term.length = targetUpto;
|
||||
if (DEBUG) {
|
||||
System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs minIDVersion=" + minIDVersion);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" frame.maxIDVersion=" + currentFrame.maxIDVersion + " vs minIDVersion=" + minIDVersion);
|
||||
// }
|
||||
|
||||
if (currentFrame.maxIDVersion < minIDVersion) {
|
||||
// The max version for all terms in this block is lower than the minVersion
|
||||
|
@ -563,9 +563,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
|
||||
final SeekStatus result = currentFrame.scanToTerm(target, true);
|
||||
if (result == SeekStatus.FOUND) {
|
||||
if (DEBUG) {
|
||||
System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
|
||||
// }
|
||||
currentFrame.decodeMetaData();
|
||||
if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
|
||||
// This term's version is lower than the minVersion
|
||||
|
@ -573,9 +573,9 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
return true;
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
|
||||
// }
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
boolean hasTermsOrig;
|
||||
boolean isFloor;
|
||||
|
||||
static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
|
||||
// static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
|
||||
|
||||
/** Highest version of any term in this block. */
|
||||
long maxIDVersion;
|
||||
|
@ -277,7 +277,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public boolean nextNonLeaf() {
|
||||
if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
final int code = suffixesReader.readVInt();
|
||||
|
@ -299,9 +299,9 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
ste.termExists = false;
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
if (DEBUG) {
|
||||
System.out.println(" lastSubFP=" + lastSubFP);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" lastSubFP=" + lastSubFP);
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -312,22 +312,22 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
public void scanToFloorFrame(BytesRef target) {
|
||||
|
||||
if (!isFloor || target.length <= prefix) {
|
||||
if (DEBUG) {
|
||||
System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
|
||||
// }
|
||||
return;
|
||||
}
|
||||
|
||||
final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char) targetLabel) + " vs nextFloorLabel=" + ((char) nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char) targetLabel) + " vs nextFloorLabel=" + ((char) nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
if (targetLabel < nextFloorLabel) {
|
||||
if (DEBUG) {
|
||||
System.out.println(" already on correct block");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" already on correct block");
|
||||
// }
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -338,25 +338,25 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
final long code = floorDataReader.readVLong();
|
||||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
if (DEBUG) {
|
||||
System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
numFollowFloorBlocks--;
|
||||
|
||||
if (isLastInFloor) {
|
||||
nextFloorLabel = 256;
|
||||
if (DEBUG) {
|
||||
System.out.println(" stop! last block nextFloorLabel=" + ((char) nextFloorLabel));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" stop! last block nextFloorLabel=" + ((char) nextFloorLabel));
|
||||
// }
|
||||
break;
|
||||
} else {
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
if (targetLabel < nextFloorLabel) {
|
||||
if (DEBUG) {
|
||||
System.out.println(" stop! nextFloorLabel=" + ((char) nextFloorLabel));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" stop! nextFloorLabel=" + ((char) nextFloorLabel));
|
||||
// }
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -364,15 +364,15 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
if (newFP != fp) {
|
||||
// Force re-load of the block:
|
||||
if (DEBUG) {
|
||||
System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp);
|
||||
// }
|
||||
nextEnt = -1;
|
||||
fp = newFP;
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
System.out.println(" stay on same fp=" + newFP);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" stay on same fp=" + newFP);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -472,7 +472,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + IDVersionSegmentTermsEnum.brToString(target) + " term=" + IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + IDVersionSegmentTermsEnum.brToString(target) + " term=" + IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -495,13 +495,13 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
suffix = suffixesReader.readVInt();
|
||||
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytesRef = new BytesRef();
|
||||
suffixBytesRef.bytes = suffixBytes;
|
||||
suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
suffixBytesRef.length = suffix;
|
||||
System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + IDVersionSegmentTermsEnum.brToString(suffixBytesRef));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// BytesRef suffixBytesRef = new BytesRef();
|
||||
// suffixBytesRef.bytes = suffixBytes;
|
||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + IDVersionSegmentTermsEnum.brToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
|
@ -598,7 +598,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + IDVersionSegmentTermsEnum.brToString(target) + " term=" + IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + IDVersionSegmentTermsEnum.brToString(target) + " term=" + IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
|
|
@ -58,32 +58,8 @@ import org.apache.lucene.util.fst.PairOutputs.Pair;
|
|||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
/** A block-based terms index and dictionary that assigns
|
||||
* terms to variable length blocks according to how they
|
||||
* share prefixes. The terms index is a prefix trie
|
||||
* whose leaves are term blocks. The advantage of this
|
||||
* approach is that seekExact is often able to
|
||||
* determine a term cannot exist without doing any IO, and
|
||||
* intersection with Automata is very fast. Note that this
|
||||
* terms dictionary has it's own fixed terms index (ie, it
|
||||
* does not support a pluggable terms index
|
||||
* implementation).
|
||||
*
|
||||
* <p><b>NOTE</b>: this terms dictionary supports
|
||||
* min/maxItemsPerBlock during indexing to control how
|
||||
* much memory the terms index uses.</p>
|
||||
*
|
||||
* <p>The data structure used by this implementation is very
|
||||
* similar to a burst trie
|
||||
* (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
|
||||
* but with added logic to break up too-large blocks of all
|
||||
* terms sharing a given prefix into smaller ones.</p>
|
||||
*
|
||||
* <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code>
|
||||
* option to see summary statistics on the blocks in the
|
||||
* dictionary.
|
||||
*
|
||||
* See {@link BlockTreeTermsWriter}.
|
||||
/**
|
||||
* See {@link VersionBlockTreeTermsWriter}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.codecs.BlockTermState;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -53,10 +54,6 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
|
|||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// nocommit break out the "don't write del docs on flush"
|
||||
|
||||
// nocommit don't write/read stats
|
||||
|
||||
/*
|
||||
TODO:
|
||||
|
||||
|
@ -97,7 +94,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
|
||||
final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
||||
|
||||
private static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
|
||||
// private static boolean DEBUG = IDVersionSegmentTermsEnum.DEBUG;
|
||||
|
||||
static final PairOutputs<BytesRef,Long> FST_OUTPUTS = new PairOutputs<>(ByteSequenceOutputs.getSingleton(),
|
||||
PositiveIntOutputs.getSingleton());
|
||||
|
@ -106,12 +103,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
/** Suggested default value for the {@code
|
||||
* minItemsInBlock} parameter to {@link
|
||||
* #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
|
||||
* #VersionBlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
|
||||
public final static int DEFAULT_MIN_BLOCK_SIZE = 25;
|
||||
|
||||
/** Suggested default value for the {@code
|
||||
* maxItemsInBlock} parameter to {@link
|
||||
* #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
|
||||
* #VersionBlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
|
||||
public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
|
||||
|
||||
//public final static boolean DEBUG = false;
|
||||
|
@ -516,6 +513,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// TODO: we could store min & max suffix start byte
|
||||
// in each block, to make floor blocks authoritative
|
||||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
final BytesRef prefix = new BytesRef(prefixLength);
|
||||
for(int m=0;m<prefixLength;m++) {
|
||||
|
@ -525,6 +523,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
//System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
|
||||
System.out.println("writeBlocks: prefix=" + toString(prefix) + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
//System.out.println("\nwbs count=" + count);
|
||||
|
||||
final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
|
||||
|
@ -745,9 +744,9 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// Write block header:
|
||||
out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
|
||||
// }
|
||||
|
||||
// 1st pass: pack term suffix bytes into byte[] blob
|
||||
// TODO: cutover to bulk int codec... simple64?
|
||||
|
@ -791,12 +790,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
BlockTermState state = term.state;
|
||||
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
|
||||
final int suffix = term.term.length - prefixLength;
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
||||
// }
|
||||
// For leaf block we write suffix straight
|
||||
suffixWriter.writeVInt(suffix);
|
||||
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
|
||||
|
@ -821,12 +820,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
BlockTermState state = term.state;
|
||||
maxVersionInBlock = Math.max(maxVersionInBlock, ((IDVersionTermState) state).idVersion);
|
||||
final int suffix = term.term.length - prefixLength;
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" " + (countx++) + ": write term suffix=" + toString(suffixBytes));
|
||||
// }
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
// if entry is term or sub-block
|
||||
suffixWriter.writeVInt(suffix<<1);
|
||||
|
@ -864,12 +863,12 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
|
||||
assert block.fp < startFP;
|
||||
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" " + (countx++) + ": write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// }
|
||||
|
||||
suffixWriter.writeVLong(startFP - block.fp);
|
||||
subIndices.add(block.index);
|
||||
|
@ -937,6 +936,7 @@ final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
|
||||
|
||||
BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
|
||||
// TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on flush:
|
||||
if (state != null && ((IDVersionPostingsWriter) postingsWriter).lastDocID != -1) {
|
||||
assert state.docFreq != 0;
|
||||
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
A primary-key postings format that associations a monotonically increasing version with each term.
|
||||
</body>
|
||||
</html>
|
|
@ -612,7 +612,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// following floor blocks:
|
||||
|
||||
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
|
||||
System.out.println("writeBlocks count=" + count);
|
||||
// System.out.println("writeBlocks count=" + count);
|
||||
if (count <= maxItemsInBlock) {
|
||||
// Easy case: not floor block. Eg, prefix is "foo",
|
||||
// and we found 30 terms/sub-blocks starting w/ that
|
||||
|
@ -621,7 +621,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final PendingBlock nonFloorBlock = writeBlock(prevTerm, prefixLength, prefixLength, count, count, 0, false, -1, true);
|
||||
nonFloorBlock.compileIndex(null, scratchBytes);
|
||||
pending.add(nonFloorBlock);
|
||||
System.out.println(" 1 block");
|
||||
// System.out.println(" 1 block");
|
||||
} else {
|
||||
// Floor block case. Eg, prefix is "foo" but we
|
||||
// have 100 terms/sub-blocks starting w/ that
|
||||
|
@ -778,7 +778,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
floorBlocks.add(floorBlock);
|
||||
}
|
||||
curStart -= pendingCount;
|
||||
System.out.println(" floor=" + pendingCount);
|
||||
// System.out.println(" floor=" + pendingCount);
|
||||
//System.out.println(" = " + pendingCount);
|
||||
pendingCount = 0;
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
private int targetBeforeCurrentLength;
|
||||
|
||||
static boolean DEBUG = false;
|
||||
// static boolean DEBUG = false;
|
||||
|
||||
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
|
||||
|
||||
|
@ -70,9 +70,9 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
public SegmentTermsEnum(FieldReader fr) throws IOException {
|
||||
this.fr = fr;
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("BTTR.init seg=" + fr.parent.segment);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println("BTTR.init seg=" + fr.parent.segment);
|
||||
// }
|
||||
stack = new SegmentTermsEnumFrame[0];
|
||||
|
||||
// Used to hold seek by TermState, or cached seek
|
||||
|
@ -323,10 +323,10 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
assert clearEOF();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
FST.Arc<BytesRef> arc;
|
||||
int targetUpto;
|
||||
|
@ -365,9 +365,9 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
// First compare up to valid seek frames:
|
||||
while (targetUpto < targetLimit) {
|
||||
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
if (DEBUG) {
|
||||
System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -392,9 +392,9 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
final int targetLimit2 = Math.min(target.length, term.length);
|
||||
while (targetUpto < targetLimit2) {
|
||||
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
if (DEBUG) {
|
||||
System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -581,10 +581,10 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
assert clearEOF();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
FST.Arc<BytesRef> arc;
|
||||
int targetUpto;
|
||||
|
@ -893,10 +893,10 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
targetBeforeCurrentLength = currentFrame.ord;
|
||||
|
||||
assert !eof;
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
if (currentFrame == staticFrame) {
|
||||
// If seek was previously called and the term was
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
BlockTree terms dictionary.
|
||||
|
||||
<p>
|
||||
This terms dictionary organizes all terms into blocks according to
|
||||
shared prefix, and then stores the prefix trie in memory as an FST as
|
||||
the index structure. It allows you to plug in your own {@link
|
||||
org.apache.lucene.codecs.PostingsBaseFormat} to implement the postings.
|
||||
</p>
|
||||
|
||||
<p>See {@link org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter}
|
||||
for the file format.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -127,6 +127,8 @@ class BufferedUpdates {
|
|||
final AtomicInteger numTermDeletes = new AtomicInteger();
|
||||
final AtomicInteger numNumericUpdates = new AtomicInteger();
|
||||
final AtomicInteger numBinaryUpdates = new AtomicInteger();
|
||||
|
||||
// TODO: rename thes three: put "deleted" prefix in front:
|
||||
final Map<Term,Integer> terms = new HashMap<>();
|
||||
final Map<Query,Integer> queries = new HashMap<>();
|
||||
final List<Integer> docIDs = new ArrayList<>();
|
||||
|
|
|
@ -168,11 +168,6 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
return in.seekCeil(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
return in.seekExact(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
in.seekExact(ord);
|
||||
|
|
|
@ -60,9 +60,6 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
int delDocLimit = segDeletes.get(deleteTerm);
|
||||
while (true) {
|
||||
int doc = docsEnum.nextDoc();
|
||||
if (doc == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (doc < delDocLimit) {
|
||||
if (state.liveDocs == null) {
|
||||
state.liveDocs = state.segmentInfo.getCodec().liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
|
||||
|
|
|
@ -98,18 +98,15 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
|||
final boolean noOutput2 = pair.output2.equals(outputs2.getNoOutput());
|
||||
|
||||
if (noOutput1 && pair.output1 != outputs1.getNoOutput()) {
|
||||
System.out.println("no1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (noOutput2 && pair.output2 != outputs2.getNoOutput()) {
|
||||
System.out.println("no2");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (noOutput1 && noOutput2) {
|
||||
if (pair != NO_OUTPUT) {
|
||||
System.out.println("no3");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
|
|
|
@ -187,8 +187,7 @@ public class TestFilterAtomicReader extends LuceneTestCase {
|
|||
checkOverrideMethods(FilterAtomicReader.class);
|
||||
checkOverrideMethods(FilterAtomicReader.FilterFields.class);
|
||||
checkOverrideMethods(FilterAtomicReader.FilterTerms.class);
|
||||
// nocommit this gets angry because I override testExact but this is important!!
|
||||
//checkOverrideMethods(FilterAtomicReader.FilterTermsEnum.class);
|
||||
checkOverrideMethods(FilterAtomicReader.FilterTermsEnum.class);
|
||||
checkOverrideMethods(FilterAtomicReader.FilterDocsEnum.class);
|
||||
checkOverrideMethods(FilterAtomicReader.FilterDocsAndPositionsEnum.class);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.util.Iterator;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.VirtualMethod;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
/*
|
||||
|
@ -117,12 +118,16 @@ public class AssertingAtomicReader extends FilterAtomicReader {
|
|||
}
|
||||
}
|
||||
|
||||
static final VirtualMethod<TermsEnum> SEEK_EXACT = new VirtualMethod<>(TermsEnum.class, "seekExact", BytesRef.class);
|
||||
|
||||
static class AssertingTermsEnum extends FilterTermsEnum {
|
||||
private enum State {INITIAL, POSITIONED, UNPOSITIONED};
|
||||
private State state = State.INITIAL;
|
||||
private final boolean delegateOverridesSeekExact;
|
||||
|
||||
public AssertingTermsEnum(TermsEnum in) {
|
||||
super(in);
|
||||
delegateOverridesSeekExact = SEEK_EXACT.isOverriddenAsOf(in.getClass());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -213,13 +218,18 @@ public class AssertingAtomicReader extends FilterAtomicReader {
|
|||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
assert text.isValid();
|
||||
if (super.seekExact(text)) {
|
||||
boolean result;
|
||||
if (delegateOverridesSeekExact) {
|
||||
result = in.seekExact(text);
|
||||
} else {
|
||||
result = super.seekExact(text);
|
||||
}
|
||||
if (result) {
|
||||
state = State.POSITIONED;
|
||||
return true;
|
||||
} else {
|
||||
state = State.UNPOSITIONED;
|
||||
return false;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,8 +31,8 @@ import org.apache.lucene.util.VirtualMethod;
|
|||
/** Wraps a Scorer with additional checks */
|
||||
public class AssertingBulkScorer extends BulkScorer {
|
||||
|
||||
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", LeafCollector.class);
|
||||
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR_RANGE = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", LeafCollector.class, int.class);
|
||||
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR = new VirtualMethod<>(BulkScorer.class, "score", LeafCollector.class);
|
||||
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR_RANGE = new VirtualMethod<>(BulkScorer.class, "score", LeafCollector.class, int.class);
|
||||
|
||||
public static BulkScorer wrap(Random random, BulkScorer other) {
|
||||
if (other == null || other instanceof AssertingBulkScorer) {
|
||||
|
|
|
@ -176,9 +176,6 @@ public class TestRuleLimitSysouts extends TestRuleAdapter {
|
|||
}
|
||||
|
||||
protected boolean isEnforced() {
|
||||
return false;
|
||||
// nocommit
|
||||
/*
|
||||
Class<?> target = RandomizedTest.getContext().getTargetClass();
|
||||
|
||||
if (LuceneTestCase.VERBOSE ||
|
||||
|
@ -192,7 +189,6 @@ public class TestRuleLimitSysouts extends TestRuleAdapter {
|
|||
}
|
||||
|
||||
return true;
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue