mirror of https://github.com/apache/lucene.git
Move `brToString(BytesRef)` to `ToStringUtils` (#13068)
This commit is contained in:
parent
b16d7117a3
commit
9206bdeb06
|
@ -205,7 +205,8 @@ Bug Fixes
|
|||
|
||||
Other
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
|
||||
|
||||
======================== Lucene 9.10.0 =======================
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ public final class FieldReader extends Terms {
|
|||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
||||
// BlockTreeTermsWriter.brToString(startTerm));
|
||||
// ToStringUtils.bytesRefToString(startTerm));
|
||||
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||
// can we optimize knowing that...?
|
||||
|
|
|
@ -543,19 +543,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private void copyTerm() {
|
||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||
if (term.bytes.length < len) {
|
||||
|
|
|
@ -354,24 +354,6 @@ public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
|
|||
return fieldMap.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// terms index
|
||||
|
|
|
@ -256,8 +256,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
// if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -279,7 +279,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -299,27 +299,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRefBuilder b) {
|
||||
return brToString(b.get());
|
||||
}
|
||||
*/
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef target) throws IOException {
|
||||
|
||||
|
@ -337,8 +316,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
|
||||
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||
// ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -496,8 +476,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -528,7 +508,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -544,7 +524,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -587,7 +567,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -623,7 +603,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) +
|
||||
// " current=" + ToStringUtils.bytesRefToString(term)
|
||||
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
@ -667,9 +648,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -781,8 +762,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -818,7 +799,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -829,7 +811,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term));
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -1029,9 +1012,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
|
||||
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||
// " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -1095,8 +1079,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// try to scan to the right floor frame:
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
|
||||
// + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -317,8 +317,8 @@ final class SegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public void nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -410,8 +410,8 @@ final class SegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -566,28 +566,14 @@ final class SegmentTermsEnumFrame {
|
|||
private long subCode;
|
||||
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
||||
|
||||
// for debugging
|
||||
/*
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Target's prefix matches this block's prefix; we
|
||||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -617,7 +603,7 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
|
@ -682,8 +668,9 @@ final class SegmentTermsEnumFrame {
|
|||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(target));
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -711,7 +698,8 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -743,8 +731,8 @@ final class SegmentTermsEnumFrame {
|
|||
// return NOT_FOUND:
|
||||
fillTerm();
|
||||
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
|
||||
// ste.termExists=" + ste.termExists);
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
|
||||
// " ste.termExists=" + ste.termExists);
|
||||
|
||||
if (!exactOnly && !ste.termExists) {
|
||||
// System.out.println(" now pushFrame");
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.compress.LZ4;
|
||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
|
@ -349,7 +350,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
termsWriter.write(term, termsEnum, norms);
|
||||
}
|
||||
|
||||
|
@ -388,33 +389,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TERM: " + brToString(termBytes);
|
||||
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "(null)";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
private static final class PendingBlock extends PendingEntry {
|
||||
public final BytesRef prefix;
|
||||
public final long fp;
|
||||
|
@ -442,7 +420,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: prefix=" + brToString(prefix);
|
||||
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -600,8 +578,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// if (DEBUG2) {
|
||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||
// br.length = prefixLength;
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
|
||||
// + count);
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
|
||||
// ToStringUtils.bytesRefToString(br) + " count=" + count);
|
||||
// }
|
||||
|
||||
// Root block better write all remaining pending entries:
|
||||
|
@ -754,9 +732,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
prefix.length = prefixLength;
|
||||
|
||||
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
||||
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
|
||||
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
|
||||
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
|
||||
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
|
||||
// hasSubBlocks);
|
||||
|
||||
// Write block header:
|
||||
int numEntries = end - start;
|
||||
|
@ -769,7 +748,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
|
||||
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
|
||||
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -804,7 +785,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For leaf block we write suffix straight
|
||||
|
@ -837,7 +819,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
|
@ -879,8 +862,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
|
||||
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// System.out.println(" write sub-block suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes) +
|
||||
// " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// }
|
||||
|
||||
assert floorLeadLabel == -1
|
||||
|
@ -998,7 +982,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
|
||||
" pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -1051,8 +1036,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
|
@ -230,24 +230,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
|||
return fields.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// term dictionary
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
|
@ -288,29 +289,10 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return brToString(termBytes);
|
||||
return ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
private static final class SubIndex {
|
||||
public final FST<Output> index;
|
||||
public final long termOrdStart;
|
||||
|
@ -353,7 +335,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: " + brToString(prefix);
|
||||
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -457,9 +439,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
Output newOutput =
|
||||
FST_OUTPUTS.newOutput(
|
||||
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
|
||||
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + "
|
||||
// termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" +
|
||||
// newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
||||
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
|
||||
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
|
||||
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
|
||||
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
|
||||
}
|
||||
}
|
||||
|
@ -642,8 +624,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
long startFP = out.getFilePointer();
|
||||
|
||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
|
||||
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
|
||||
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
|
||||
|
@ -662,11 +644,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
out.writeVInt(code);
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
|
||||
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
|
||||
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
|
||||
// isLastInFloor);
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
|
||||
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
|
||||
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
|
||||
// " isLastInFloor=" + isLastInFloor);
|
||||
// }
|
||||
|
||||
final List<SubIndex> subIndices;
|
||||
|
@ -784,7 +766,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
|
||||
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -842,7 +825,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" +
|
||||
Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -885,8 +869,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
throws IOException {
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
|
||||
// brToString(compiled.commonSuffixRef));
|
||||
// ToStringUtils.bytesRefToString(compiled.commonSuffixRef));
|
||||
// }
|
||||
this.fr = fr;
|
||||
this.byteRunnable = compiled.getByteRunnable();
|
||||
|
@ -283,13 +283,15 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
currentFrame.loadNextFloorBlock();
|
||||
continue;
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term));
|
||||
// if (DEBUG) System.out.println(" return term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
return;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} else if (cmp == 0) {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term));
|
||||
// if (DEBUG) System.out.println(" return term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
return;
|
||||
} else {
|
||||
// Fallback to prior entry: the semantics of
|
||||
|
@ -327,10 +329,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nintEnum.next seg=" + segment);
|
||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new
|
||||
// BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + "
|
||||
// lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" +
|
||||
// (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
// }
|
||||
|
@ -343,9 +345,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
// if (DEBUG) System.out.println(" next-floor-block");
|
||||
currentFrame.loadNextFloorBlock();
|
||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||
// currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
} else {
|
||||
|
@ -357,9 +360,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
currentFrame = stack[currentFrame.ord - 1];
|
||||
assert currentFrame.lastSubFP == lastFP;
|
||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||
// currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
|
||||
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
}
|
||||
|
@ -373,7 +377,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
// suffixRef.length = currentFrame.suffix;
|
||||
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
|
||||
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
|
||||
// brToString(suffixRef));
|
||||
// ToStringUtils.bytesRefToString(suffixRef));
|
||||
// }
|
||||
|
||||
if (currentFrame.suffix != 0) {
|
||||
|
@ -480,15 +484,16 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
copyTerm();
|
||||
currentFrame = pushFrame(state);
|
||||
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
|
||||
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
|
||||
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
|
||||
// currentFrame.prefix)) +
|
||||
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
|
||||
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
|
||||
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
|
||||
// currentFrame.outputPrefix);
|
||||
} else if (byteRunnable.isAccept(state)) {
|
||||
copyTerm();
|
||||
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
|
||||
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
|
||||
return term;
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
|
@ -174,11 +175,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
throws IOException {
|
||||
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + "
|
||||
// nextEnt=" + f.nextEnt);
|
||||
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp +
|
||||
// " nextEnt=" + f.nextEnt);
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -204,7 +205,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -224,19 +225,6 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(final BytesRef target) throws IOException {
|
||||
|
||||
|
@ -250,7 +238,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" +
|
||||
ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||
") validIndexPrefix=" + validIndexPrefix);
|
||||
printSeekState(System.out);
|
||||
}
|
||||
*/
|
||||
|
@ -411,8 +401,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
positioned = true;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -443,7 +433,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -459,7 +449,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -502,7 +492,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -537,8 +527,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
||||
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
|
||||
// termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
||||
|
@ -581,9 +571,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -697,8 +687,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
positioned = true;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -733,7 +723,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -744,7 +735,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -829,7 +821,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
+ " hasTerms="
|
||||
+ f.hasTerms
|
||||
|
@ -859,7 +851,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ " nextEnt="
|
||||
+ f.nextEnt
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
|
@ -951,8 +943,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) +
|
||||
// " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
@ -1019,8 +1012,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
// currentFrame.hasTerms = true;
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
|
||||
// currentFrame.ord=" + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
positioned = true;
|
||||
return term.get();
|
||||
}
|
||||
|
@ -1235,8 +1228,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
int low = 0;
|
||||
int high = arc.numArcs() - 1;
|
||||
int mid = 0;
|
||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
|
||||
// output=" + output);
|
||||
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput +
|
||||
// " output=" + output);
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
|
|
|
@ -119,8 +119,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
numFollowFloorBlocks = floorDataReader.readVInt();
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
|
||||
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + "
|
||||
// shift=" + (nextFloorTermOrd-termOrdOrig));
|
||||
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd +
|
||||
// " shift=" + (nextFloorTermOrd-termOrdOrig));
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
|
||||
|
@ -289,8 +289,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
|
||||
nextEnt++;
|
||||
|
@ -306,8 +306,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public boolean nextNonLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -374,8 +374,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -440,8 +440,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -495,8 +495,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
boolean absolute = metaDataUpto == 0;
|
||||
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
|
||||
|
||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
|
||||
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
|
||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
|
||||
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
|
||||
|
||||
// TODO: better API would be "jump straight to term=N"???
|
||||
while (metaDataUpto < limit) {
|
||||
|
@ -593,10 +593,10 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
|
||||
// OrdsSegmentTermsEnum.brToString(ste.term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||
// ToStringUtils.bytesRefToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -627,7 +627,7 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + OrdsSegmentTermsEnum.brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -714,8 +714,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
|
||||
// OrdsSegmentTermsEnum.brToString(ste.term));
|
||||
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||
// ToStringUtils.bytesRefToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -743,7 +743,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
ste.termExists = (code & 1) == 0;
|
||||
|
|
|
@ -210,7 +210,7 @@ public final class FieldReader extends Terms {
|
|||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
|
||||
// BlockTreeTermsWriter.brToString(startTerm));
|
||||
// ToStringUtils.bytesRefToString(startTerm));
|
||||
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||
// can we optimize knowing that...?
|
||||
|
|
|
@ -549,19 +549,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private void copyTerm() {
|
||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||
if (term.bytes.length < len) {
|
||||
|
|
|
@ -307,24 +307,6 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
|||
return fieldMap.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// terms index
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.compress.LZ4;
|
||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
|
@ -394,7 +395,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
termsWriter.write(term, termsEnum, norms);
|
||||
}
|
||||
|
||||
|
@ -433,33 +434,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TERM: " + brToString(termBytes);
|
||||
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "(null)";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes long value to variable length byte[], in MSB order. Use {@link
|
||||
* FieldReader#readMSBVLong} to decode.
|
||||
|
@ -506,7 +484,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: prefix=" + brToString(prefix);
|
||||
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -689,8 +667,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// if (DEBUG2) {
|
||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||
// br.length = prefixLength;
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
|
||||
// + count);
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
|
||||
// ToStringUtils.bytesRefToString(br) + " count=" + count);
|
||||
// }
|
||||
|
||||
// Root block better write all remaining pending entries:
|
||||
|
@ -843,9 +821,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
prefix.length = prefixLength;
|
||||
|
||||
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
|
||||
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
|
||||
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
|
||||
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
|
||||
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
|
||||
// hasSubBlocks);
|
||||
|
||||
// Write block header:
|
||||
int numEntries = end - start;
|
||||
|
@ -858,7 +837,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
|
||||
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
|
||||
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -893,7 +874,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For leaf block we write suffix straight
|
||||
|
@ -926,7 +908,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// System.out.println(" write term suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes));
|
||||
// }
|
||||
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
|
@ -968,8 +951,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
|
||||
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// System.out.println(" write sub-block suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" +
|
||||
// (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
// }
|
||||
|
||||
assert floorLeadLabel == -1
|
||||
|
@ -1090,7 +1074,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
|
||||
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
|
||||
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
|
||||
" pending.size()=" + pending.size());
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -1143,8 +1128,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
|
@ -263,8 +263,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
// if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -286,7 +286,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -306,27 +306,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRefBuilder b) {
|
||||
return brToString(b.get());
|
||||
}
|
||||
*/
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef target) throws IOException {
|
||||
|
||||
|
@ -344,8 +323,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
|
||||
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||
// ToStringUtils.bytesRefToString(term) +
|
||||
// " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -499,8 +479,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -531,7 +511,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -547,7 +527,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -586,7 +566,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -622,8 +602,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
|
||||
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
|
||||
// ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -663,9 +644,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -771,8 +752,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -808,7 +789,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -819,7 +801,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term));
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -1015,9 +998,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
|
||||
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||
// " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -1081,8 +1065,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// try to scan to the right floor frame:
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
|
||||
// + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -295,8 +295,8 @@ final class SegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public void nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -388,8 +388,8 @@ final class SegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -531,28 +531,14 @@ final class SegmentTermsEnumFrame {
|
|||
private long subCode;
|
||||
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
|
||||
|
||||
// for debugging
|
||||
/*
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Target's prefix matches this block's prefix; we
|
||||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -582,7 +568,7 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
|
@ -647,8 +633,9 @@ final class SegmentTermsEnumFrame {
|
|||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
|
||||
// brToString(target));
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -676,7 +663,8 @@ final class SegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -708,8 +696,8 @@ final class SegmentTermsEnumFrame {
|
|||
// return NOT_FOUND:
|
||||
fillTerm();
|
||||
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
|
||||
// ste.termExists=" + ste.termExists);
|
||||
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
|
||||
// " ste.termExists=" + ste.termExists);
|
||||
|
||||
if (!exactOnly && !ste.termExists) {
|
||||
// System.out.println(" now pushFrame");
|
||||
|
|
|
@ -130,17 +130,20 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
|
|||
return false;
|
||||
}
|
||||
|
||||
/** Interprets stored bytes as UTF8 bytes, returning the resulting string */
|
||||
/**
|
||||
* Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link
|
||||
* AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8.
|
||||
*/
|
||||
public String utf8ToString() {
|
||||
final char[] ref = new char[length];
|
||||
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
return new String(ref, 0, len);
|
||||
}
|
||||
|
||||
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
||||
/** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
StringBuilder sb = new StringBuilder(2 + 3 * length);
|
||||
sb.append('[');
|
||||
final int end = offset + length;
|
||||
for (int i = offset; i < end; i++) {
|
||||
|
|
|
@ -32,6 +32,10 @@ public final class ToStringUtils {
|
|||
|
||||
private static final char[] HEX = "0123456789abcdef".toCharArray();
|
||||
|
||||
/**
|
||||
* Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading
|
||||
* zeros.
|
||||
*/
|
||||
public static String longHex(long x) {
|
||||
char[] asHex = new char[16];
|
||||
for (int i = 16; --i >= 0; x >>>= 4) {
|
||||
|
@ -39,4 +43,31 @@ public final class ToStringUtils {
|
|||
}
|
||||
return "0x" + new String(asHex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex
|
||||
* values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8
|
||||
* sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public static String bytesRefToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
}
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (AssertionError | RuntimeException t) {
|
||||
// If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8
|
||||
// that ends mid-unicode-char, we fall back to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static String bytesRefToString(BytesRefBuilder b) {
|
||||
return bytesRefToString(b.get());
|
||||
}
|
||||
|
||||
public static String bytesRefToString(byte[] b) {
|
||||
return bytesRefToString(new BytesRef(b));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
@ -175,8 +176,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
|
||||
f.arc = arc;
|
||||
if (f.fpOrig == fp && f.nextEnt != -1) {
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
|
||||
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
|
||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||
// term.length + " vs prefix=" + f.prefix);
|
||||
if (f.prefix > targetBeforeCurrentLength) {
|
||||
|
@ -197,7 +198,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
// final int sav = term.length;
|
||||
// term.length = length;
|
||||
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
|
||||
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
|
||||
// term.length = sav;
|
||||
// }
|
||||
}
|
||||
|
@ -222,19 +223,6 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
return seekExact(target, 0);
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the version of the currently seek'd term; only valid if we are positioned. */
|
||||
public long getVersion() {
|
||||
return ((IDVersionTermState) currentFrame.state).idVersion;
|
||||
|
@ -258,8 +246,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
|
||||
// fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current="
|
||||
// + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
|
||||
// minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
|
||||
// termExists + ") validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -460,8 +449,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength + " termExists=" + termExists);
|
||||
// }
|
||||
|
||||
|
@ -492,7 +481,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
term.setLength(1 + targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -520,10 +509,11 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
// termExists = false;
|
||||
// }
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + "
|
||||
// targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion +
|
||||
// " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " +
|
||||
// currentFrame.fp + " termExists=" + termExists);
|
||||
// System.out.println(" FAST version NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
|
||||
// " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
|
||||
// validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
|
||||
// " termExists=" + termExists);
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -553,7 +543,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
|
||||
// brToString(term));
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -604,7 +594,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
term.setLength(targetUpto);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
|
||||
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
@ -656,8 +646,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
|
||||
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
|
||||
// termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
|
||||
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
||||
|
@ -700,9 +690,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
|
||||
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
|
||||
// " output=" + output);
|
||||
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
|
||||
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
|
||||
// + " output=" + output);
|
||||
// }
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -814,8 +804,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
|
||||
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
|
||||
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
|
||||
// targetBeforeCurrentLength);
|
||||
// }
|
||||
|
||||
|
@ -850,7 +840,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return NOT_FOUND term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -861,7 +852,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
} else {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
||||
// System.out.println(" return " + result + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term));
|
||||
// }
|
||||
return result;
|
||||
}
|
||||
|
@ -946,7 +938,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
+ " hasTerms="
|
||||
+ f.hasTerms
|
||||
|
@ -974,7 +966,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
+ " prefixLen="
|
||||
+ f.prefix
|
||||
+ " prefix="
|
||||
+ brToString(prefix)
|
||||
+ ToStringUtils.bytesRefToString(prefix)
|
||||
+ " nextEnt="
|
||||
+ f.nextEnt
|
||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||
|
@ -1063,9 +1055,10 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
assert !eof;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
|
||||
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
|
||||
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
|
||||
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
|
||||
// " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
|
||||
// " validIndexPrefix=" + validIndexPrefix);
|
||||
// printSeekState();
|
||||
// }
|
||||
|
||||
|
@ -1129,8 +1122,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
// currentFrame.hasTerms = true;
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
|
||||
// currentFrame.ord=" + currentFrame.ord);
|
||||
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
|
||||
// " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -257,8 +257,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean nextLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -273,8 +273,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
}
|
||||
|
||||
public boolean nextNonLeaf() {
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
|
||||
// entCount=" + entCount);
|
||||
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
|
||||
// " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount
|
||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -338,8 +338,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
newFP = fpOrig + (code >>> 1);
|
||||
hasTerms = (code & 1) != 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
|
||||
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
|
||||
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
|
||||
// }
|
||||
|
||||
isLastInFloor = numFollowFloorBlocks == 1;
|
||||
|
@ -379,8 +379,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
public void decodeMetaData() throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
|
||||
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
||||
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
|
||||
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
|
||||
|
||||
assert nextEnt >= 0;
|
||||
|
||||
|
@ -473,10 +473,10 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
|
||||
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// IDVersionSegmentTermsEnum.brToString(target) + " term=" +
|
||||
// IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// ToStringUtils.bytesRefToString(target) +
|
||||
// " term=" + ToStringUtils.bytesRefToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -506,7 +506,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
|
||||
// + IDVersionSegmentTermsEnum.brToString(suffixBytesRef));
|
||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
final int termLen = prefix + suffix;
|
||||
|
@ -606,8 +606,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
|
||||
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
|
||||
// IDVersionSegmentTermsEnum.brToString(target) + " term=" +
|
||||
// IDVersionSegmentTermsEnum.brToString(ste.term));
|
||||
// ToStringUtils.bytesRefToString(target) + " term=" +
|
||||
// ToStringUtils.bytesRefToString(ste.term));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -635,7 +635,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
|
||||
// ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||
// }
|
||||
|
||||
ste.termExists = (code & 1) == 0;
|
||||
|
|
|
@ -237,24 +237,6 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
|
|||
return fields.size();
|
||||
}
|
||||
|
||||
// for debugging
|
||||
String brToString(BytesRef b) {
|
||||
if (b == null) {
|
||||
return "null";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// term dictionary
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
|
@ -292,29 +293,10 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return brToString(termBytes);
|
||||
return ToStringUtils.bytesRefToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(byte[] b) {
|
||||
return brToString(new BytesRef(b));
|
||||
}
|
||||
|
||||
private static final class PendingBlock extends PendingEntry {
|
||||
public final BytesRef prefix;
|
||||
public final long fp;
|
||||
|
@ -347,7 +329,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: " + brToString(prefix);
|
||||
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(
|
||||
|
@ -610,8 +592,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
long startFP = out.getFilePointer();
|
||||
|
||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
|
||||
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
|
||||
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
|
||||
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
|
||||
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
|
||||
|
@ -630,11 +612,11 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
out.writeVInt(code);
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
|
||||
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
|
||||
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
|
||||
// isLastInFloor);
|
||||
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
|
||||
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
|
||||
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
|
||||
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
|
||||
// " isLastInFloor=" + isLastInFloor);
|
||||
// }
|
||||
|
||||
// 1st pass: pack term suffix bytes into byte[] blob
|
||||
|
@ -737,7 +719,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
|
||||
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -824,8 +807,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
if (prefixTopSize >= minItemsInBlock) {
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
|
||||
// minItemsInBlock=" + minItemsInBlock);
|
||||
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
|
||||
// " minItemsInBlock=" + minItemsInBlock);
|
||||
writeBlocks(i + 1, prefixTopSize);
|
||||
prefixStarts[i] -= prefixTopSize - 1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue