Move `brToString(BytesRef)` to `ToStringUtils` (#13068)

This commit is contained in:
Dmitry Cherniachenko 2024-02-15 18:16:44 +01:00 committed by GitHub
parent b16d7117a3
commit 9206bdeb06
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 341 additions and 530 deletions

View File

@ -205,7 +205,8 @@ Bug Fixes
Other
---------------------
(No changes)
* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko)
======================== Lucene 9.10.0 =======================

View File

@ -191,7 +191,7 @@ public final class FieldReader extends Terms {
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
// BlockTreeTermsWriter.brToString(startTerm));
// ToStringUtils.bytesRefToString(startTerm));
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?

View File

@ -543,19 +543,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
private void copyTerm() {
final int len = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < len) {

View File

@ -354,24 +354,6 @@ public final class Lucene40BlockTreeTermsReader extends FieldsProducer {
return fieldMap.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// terms index

View File

@ -256,8 +256,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
// if (f.prefix > targetBeforeCurrentLength) {
@ -279,7 +279,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -299,27 +299,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true;
}
/*
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRefBuilder b) {
return brToString(b.get());
}
*/
@Override
public boolean seekExact(BytesRef target) throws IOException {
@ -337,8 +316,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
// ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -496,8 +476,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -528,7 +508,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -544,7 +524,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -587,7 +567,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -623,7 +603,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) +
// " current=" + ToStringUtils.bytesRefToString(term)
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out);
// }
@ -667,9 +648,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -781,8 +762,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -818,7 +799,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term));
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -829,7 +811,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term));
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -1029,9 +1012,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -1095,8 +1079,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// try to scan to the right floor frame:
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
// + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
// " currentFrame.ord=" + currentFrame.ord);
return term.get();
}
}

View File

@ -317,8 +317,8 @@ final class SegmentTermsEnumFrame {
}
public void nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -410,8 +410,8 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -566,28 +566,14 @@ final class SegmentTermsEnumFrame {
private long subCode;
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
// for debugging
/*
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
*/
// Target's prefix matches this block's prefix; we
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -617,7 +603,7 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
startBytePos = suffixesReader.getPosition();
@ -682,8 +668,9 @@ final class SegmentTermsEnumFrame {
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(target));
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -711,7 +698,8 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -743,8 +731,8 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND:
fillTerm();
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
// ste.termExists=" + ste.termExists);
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
// " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) {
// System.out.println(" now pushFrame");

View File

@ -46,6 +46,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
@ -349,7 +350,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
}
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
termsWriter.write(term, termsEnum, norms);
}
@ -388,33 +389,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "TERM: " + brToString(termBytes);
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
if (b == null) {
return "(null)";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class PendingBlock extends PendingEntry {
public final BytesRef prefix;
public final long fp;
@ -442,7 +420,7 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: prefix=" + brToString(prefix);
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -600,8 +578,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
// + count);
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
// ToStringUtils.bytesRefToString(br) + " count=" + count);
// }
// Root block better write all remaining pending entries:
@ -754,9 +732,10 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength;
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
// hasSubBlocks);
// Write block header:
int numEntries = end - start;
@ -769,7 +748,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
/*
if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
}
*/
@ -804,7 +785,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For leaf block we write suffix straight
@ -837,7 +819,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For non-leaf block we borrow 1 bit to record
@ -879,8 +862,9 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// System.out.println(" write sub-block suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes) +
// " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// }
assert floorLeadLabel == -1
@ -998,7 +982,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
" pending.size()=" + pending.size());
}
*/
@ -1051,8 +1036,8 @@ public final class Lucene40BlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}

View File

@ -230,24 +230,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
return fields.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// term dictionary

View File

@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
@ -288,29 +289,10 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return brToString(termBytes);
return ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class SubIndex {
public final FST<Output> index;
public final long termOrdStart;
@ -353,7 +335,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: " + brToString(prefix);
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -457,9 +439,9 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
Output newOutput =
FST_OUTPUTS.newOutput(
output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset);
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + "
// termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" +
// newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
// System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output +
// " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput="
// + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
}
}
@ -642,8 +624,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
long startFP = out.getFilePointer();
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
@ -662,11 +644,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
out.writeVInt(code);
// if (DEBUG) {
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
// isLastInFloor);
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
// " isLastInFloor=" + isLastInFloor);
// }
final List<SubIndex> subIndices;
@ -784,7 +766,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix;
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
}
*/
@ -842,7 +825,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" +
Arrays.toString(tmp) + " pending.size()=" + pending.size());
}
*/
@ -885,8 +869,8 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}

View File

@ -61,7 +61,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
throws IOException {
// if (DEBUG) {
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" +
// brToString(compiled.commonSuffixRef));
// ToStringUtils.bytesRefToString(compiled.commonSuffixRef));
// }
this.fr = fr;
this.byteRunnable = compiled.getByteRunnable();
@ -283,13 +283,15 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
currentFrame.loadNextFloorBlock();
continue;
} else {
// if (DEBUG) System.out.println(" return term=" + brToString(term));
// if (DEBUG) System.out.println(" return term=" +
// ToStringUtils.bytesRefToString(term));
return;
}
}
continue;
} else if (cmp == 0) {
// if (DEBUG) System.out.println(" return term=" + brToString(term));
// if (DEBUG) System.out.println(" return term=" +
// ToStringUtils.bytesRefToString(term));
return;
} else {
// Fallback to prior entry: the semantics of
@ -327,10 +329,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nintEnum.next seg=" + segment);
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new
// BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + "
// lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" +
// (currentFrame.transitions.length == 0 ? "n/a" :
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" +
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
// }
@ -343,9 +345,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// if (DEBUG) System.out.println(" next-floor-block");
currentFrame.loadNextFloorBlock();
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
} else {
@ -357,9 +360,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
currentFrame = stack[currentFrame.ord - 1];
assert currentFrame.lastSubFP == lastFP;
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor +
// " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
}
@ -373,7 +377,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
// suffixRef.length = currentFrame.suffix;
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " +
// currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" +
// brToString(suffixRef));
// ToStringUtils.bytesRefToString(suffixRef));
// }
if (currentFrame.suffix != 0) {
@ -480,15 +484,16 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
copyTerm();
currentFrame = pushFrame(state);
// if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" +
// brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" +
// currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset,
// currentFrame.prefix)) +
// " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" +
// currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
// currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
// currentFrame.outputPrefix);
} else if (byteRunnable.isAccept(state)) {
copyTerm();
// if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0
: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
return term;

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
@ -174,11 +175,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
throws IOException {
final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + "
// nextEnt=" + f.nextEnt);
// System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp +
// " nextEnt=" + f.nextEnt);
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) {
@ -204,7 +205,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -224,19 +225,6 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
return true;
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
@Override
public boolean seekExact(final BytesRef target) throws IOException {
@ -250,7 +238,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
/*
if (DEBUG) {
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" +
ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
") validIndexPrefix=" + validIndexPrefix);
printSeekState(System.out);
}
*/
@ -411,8 +401,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
positioned = true;
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -443,7 +433,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -459,7 +449,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -502,7 +492,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -537,8 +527,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix= " + validIndexPrefix);
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState();
// }
@ -581,9 +571,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -697,8 +687,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
positioned = true;
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -733,7 +723,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -744,7 +735,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -829,7 +821,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ " hasTerms="
+ f.hasTerms
@ -859,7 +851,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ " nextEnt="
+ f.nextEnt
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@ -951,8 +943,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
// ToStringUtils.bytesRefToString(term) +
// " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// printSeekState();
// }
@ -1019,8 +1012,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
// currentFrame.hasTerms = true;
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
// currentFrame.ord=" + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
// " currentFrame.ord=" + currentFrame.ord);
positioned = true;
return term.get();
}
@ -1235,8 +1228,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
int low = 0;
int high = arc.numArcs() - 1;
int mid = 0;
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + "
// output=" + output);
// System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput +
// " output=" + output);
boolean found = false;
while (low <= high) {
mid = (low + high) >>> 1;

View File

@ -119,8 +119,8 @@ final class OrdsSegmentTermsEnumFrame {
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + "
// shift=" + (nextFloorTermOrd-termOrdOrig));
// System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd +
// " shift=" + (nextFloorTermOrd-termOrdOrig));
// if (DEBUG) {
// System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new
@ -289,8 +289,8 @@ final class OrdsSegmentTermsEnumFrame {
// Decodes next entry; returns true if it's a sub-block
public boolean nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
nextEnt++;
@ -306,8 +306,8 @@ final class OrdsSegmentTermsEnumFrame {
}
public boolean nextNonLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -374,8 +374,8 @@ final class OrdsSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -440,8 +440,8 @@ final class OrdsSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -495,8 +495,8 @@ final class OrdsSegmentTermsEnumFrame {
boolean absolute = metaDataUpto == 0;
assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt;
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit);
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
@ -593,10 +593,10 @@ final class OrdsSegmentTermsEnumFrame {
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
// OrdsSegmentTermsEnum.brToString(ste.term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) + " term=" +
// ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1;
@ -627,7 +627,7 @@ final class OrdsSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + OrdsSegmentTermsEnum.brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -714,8 +714,8 @@ final class OrdsSegmentTermsEnumFrame {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// OrdsSegmentTermsEnum.brToString(target) + " term=" +
// OrdsSegmentTermsEnum.brToString(ste.term));
// ToStringUtils.bytesRefToString(target) + " term=" +
// ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1;
@ -743,7 +743,8 @@ final class OrdsSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
ste.termExists = (code & 1) == 0;

View File

@ -210,7 +210,7 @@ public final class FieldReader extends Terms {
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" +
// BlockTreeTermsWriter.brToString(startTerm));
// ToStringUtils.bytesRefToString(startTerm));
// System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?

View File

@ -549,19 +549,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
private void copyTerm() {
final int len = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < len) {

View File

@ -307,24 +307,6 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
return fieldMap.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// terms index

View File

@ -47,6 +47,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.compress.LZ4;
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
@ -394,7 +395,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
}
// if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
termsWriter.write(term, termsEnum, norms);
}
@ -433,33 +434,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "TERM: " + brToString(termBytes);
return "TERM: " + ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
if (b == null) {
return "(null)";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
/**
* Encodes long value to variable length byte[], in MSB order. Use {@link
* FieldReader#readMSBVLong} to decode.
@ -506,7 +484,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: prefix=" + brToString(prefix);
return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -689,8 +667,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
// + count);
// System.out.println("writeBlocks: seg=" + segment + " prefix=" +
// ToStringUtils.bytesRefToString(br) + " count=" + count);
// }
// Root block better write all remaining pending entries:
@ -843,9 +821,10 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
prefix.length = prefixLength;
// if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" +
// brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end ==
// pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end +
// " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
// ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor +
// " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel +
// " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" +
// hasSubBlocks);
// Write block header:
int numEntries = end - start;
@ -858,7 +837,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
/*
if (DEBUG) {
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" +
pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) +
" entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
}
*/
@ -893,7 +874,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For leaf block we write suffix straight
@ -926,7 +908,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// System.out.println(" write term suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes));
// }
// For non-leaf block we borrow 1 bit to record
@ -968,8 +951,9 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// System.out.println(" write sub-block suffix=" +
// ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" +
// (startFP-block.fp) + " floor=" + block.isFloor);
// }
assert floorLeadLabel == -1
@ -1090,7 +1074,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) +
" pending.size()=" + pending.size());
}
*/
@ -1143,8 +1128,8 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}

View File

@ -263,8 +263,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
// if (f.prefix > targetBeforeCurrentLength) {
@ -286,7 +286,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -306,27 +306,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {
return true;
}
/*
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRefBuilder b) {
return brToString(b.get());
}
*/
@Override
public boolean seekExact(BytesRef target) throws IOException {
@ -344,8 +323,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?="
// + termExists + ") validIndexPrefix=" + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -499,8 +479,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -531,7 +511,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -547,7 +527,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -586,7 +566,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -622,8 +602,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term)
// + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" +
// ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists +
// ") validIndexPrefix= " + validIndexPrefix);
// printSeekState(System.out);
// }
@ -663,9 +644,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -771,8 +752,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -808,7 +789,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term));
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -819,7 +801,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term));
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -1015,9 +998,10 @@ final class SegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" +
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" +
// fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -1081,8 +1065,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// try to scan to the right floor frame:
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord="
// + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) +
// " currentFrame.ord=" + currentFrame.ord);
return term.get();
}
}

View File

@ -295,8 +295,8 @@ final class SegmentTermsEnumFrame {
}
public void nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -388,8 +388,8 @@ final class SegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -531,28 +531,14 @@ final class SegmentTermsEnumFrame {
private long subCode;
CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION;
// for debugging
/*
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
*/
// Target's prefix matches this block's prefix; we
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -582,7 +568,7 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
startBytePos = suffixesReader.getPosition();
@ -647,8 +633,9 @@ final class SegmentTermsEnumFrame {
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" +
// brToString(target));
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(term));
assert nextEnt != -1;
@ -676,7 +663,8 @@ final class SegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -708,8 +696,8 @@ final class SegmentTermsEnumFrame {
// return NOT_FOUND:
fillTerm();
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + "
// ste.termExists=" + ste.termExists);
// if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly +
// " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) {
// System.out.println(" now pushFrame");

View File

@ -130,17 +130,20 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
return false;
}
/** Interprets stored bytes as UTF8 bytes, returning the resulting string */
/**
* Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link
* AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8.
*/
public String utf8ToString() {
final char[] ref = new char[length];
final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return new String(ref, 0, len);
}
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
/** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
StringBuilder sb = new StringBuilder(2 + 3 * length);
sb.append('[');
final int end = offset + length;
for (int i = offset; i < end; i++) {

View File

@ -32,6 +32,10 @@ public final class ToStringUtils {
private static final char[] HEX = "0123456789abcdef".toCharArray();
/**
* Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading
* zeros.
*/
public static String longHex(long x) {
char[] asHex = new char[16];
for (int i = 16; --i >= 0; x >>>= 4) {
@ -39,4 +43,31 @@ public final class ToStringUtils {
}
return "0x" + new String(asHex);
}
/**
* Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex
* values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8
* sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}.
*/
@SuppressWarnings("unused")
public static String bytesRefToString(BytesRef b) {
if (b == null) {
return "null";
}
try {
return b.utf8ToString() + " " + b;
} catch (AssertionError | RuntimeException t) {
// If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8
// that ends mid-unicode-char, we fall back to hex:
return b.toString();
}
}
public static String bytesRefToString(BytesRefBuilder b) {
return bytesRefToString(b.get());
}
public static String bytesRefToString(byte[] b) {
return bytesRefToString(new BytesRef(b));
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.Util;
@ -175,8 +176,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + "
// isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp +
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
// term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) {
@ -197,7 +198,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
// term.length = sav;
// }
}
@ -222,19 +223,6 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
return seekExact(target, 0);
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
/** Get the version of the currently seek'd term; only valid if we are positioned. */
public long getVersion() {
return ((IDVersionTermState) currentFrame.state).idVersion;
@ -258,8 +246,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
// fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current="
// + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
// fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
// minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState(System.out);
// }
@ -460,8 +449,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength + " termExists=" + termExists);
// }
@ -492,7 +481,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
term.setByteAt(targetUpto, (byte) targetLabel);
term.setLength(1 + targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -520,10 +509,11 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// termExists = false;
// }
// if (DEBUG) {
// System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + "
// targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion +
// " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " +
// currentFrame.fp + " termExists=" + termExists);
// System.out.println(" FAST version NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
// " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
// validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
// " termExists=" + termExists);
// }
return false;
}
@ -553,7 +543,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" +
// brToString(term));
// ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -604,7 +594,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
termExists = false;
term.setLength(targetUpto);
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
// }
return false;
}
@ -656,8 +646,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) {
// System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
// target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" +
// termExists + ") validIndexPrefix= " + validIndexPrefix);
// target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
// " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
// printSeekState();
// }
@ -700,9 +690,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + "
// vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output +
// " output=" + output);
// ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
// " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output
// + " output=" + output);
// }
if (cmp != 0) {
break;
@ -814,8 +804,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + "
// currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output +
// " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
// targetBeforeCurrentLength);
// }
@ -850,7 +840,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
if (next() != null) {
// if (DEBUG) {
// System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
// System.out.println(" return NOT_FOUND term=" +
// ToStringUtils.bytesRefToString(term));
// }
return SeekStatus.NOT_FOUND;
} else {
@ -861,7 +852,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
}
} else {
// if (DEBUG) {
// System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
// System.out.println(" return " + result + " term=" +
// ToStringUtils.bytesRefToString(term));
// }
return result;
}
@ -946,7 +938,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
+ " hasTerms="
+ f.hasTerms
@ -974,7 +966,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
+ " prefixLen="
+ f.prefix
+ " prefix="
+ brToString(prefix)
+ ToStringUtils.bytesRefToString(prefix)
+ " nextEnt="
+ f.nextEnt
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
@ -1063,9 +1055,10 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
assert !eof;
// if (DEBUG) {
// System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + "
// termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" +
// currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
// System.out.println("\nBTTR.next seg=" + segment + " term=" +
// ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
// " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
// " validIndexPrefix=" + validIndexPrefix);
// printSeekState();
// }
@ -1129,8 +1122,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
// currentFrame.hasTerms = true;
currentFrame.loadBlock();
} else {
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + "
// currentFrame.ord=" + currentFrame.ord);
// if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term +
// " currentFrame.ord=" + currentFrame.ord);
return term.get();
}
}

View File

@ -257,8 +257,8 @@ final class IDVersionSegmentTermsEnumFrame {
// Decodes next entry; returns true if it's a sub-block
public boolean nextLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -273,8 +273,8 @@ final class IDVersionSegmentTermsEnumFrame {
}
public boolean nextNonLeaf() {
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
// entCount=" + entCount);
// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt +
// " entCount=" + entCount);
assert nextEnt != -1 && nextEnt < entCount
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
nextEnt++;
@ -338,8 +338,8 @@ final class IDVersionSegmentTermsEnumFrame {
newFP = fpOrig + (code >>> 1);
hasTerms = (code & 1) != 0;
// if (DEBUG) {
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + "
// hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP +
// " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
// }
isLastInFloor = numFollowFloorBlocks == 1;
@ -379,8 +379,8 @@ final class IDVersionSegmentTermsEnumFrame {
public void decodeMetaData() throws IOException {
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + "
// mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
// if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment +
// " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
assert nextEnt >= 0;
@ -473,10 +473,10 @@ final class IDVersionSegmentTermsEnumFrame {
// scan the entries check if the suffix matches.
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + "
// nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// IDVersionSegmentTermsEnum.brToString(target) + " term=" +
// IDVersionSegmentTermsEnum.brToString(ste.term));
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// ToStringUtils.bytesRefToString(target) +
// " term=" + ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1;
@ -506,7 +506,7 @@ final class IDVersionSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix="
// + IDVersionSegmentTermsEnum.brToString(suffixBytesRef));
// + ToStringUtils.bytesRefToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
@ -606,8 +606,8 @@ final class IDVersionSegmentTermsEnumFrame {
// if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix +
// " nextEnt=" + nextEnt + " (of " + entCount + ") target=" +
// IDVersionSegmentTermsEnum.brToString(target) + " term=" +
// IDVersionSegmentTermsEnum.brToString(ste.term));
// ToStringUtils.bytesRefToString(target) + " term=" +
// ToStringUtils.bytesRefToString(ste.term));
assert nextEnt != -1;
@ -635,7 +635,8 @@ final class IDVersionSegmentTermsEnumFrame {
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " +
// (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// (nextEnt-1) + " (of " + entCount + ") suffix=" +
// ToStringUtils.bytesRefToString(suffixBytesRef));
// }
ste.termExists = (code & 1) == 0;

View File

@ -237,24 +237,6 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
return fields.size();
}
// for debugging
String brToString(BytesRef b) {
if (b == null) {
return "null";
} else {
try {
return b.utf8ToString() + " " + b;
} catch (
@SuppressWarnings("unused")
Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
}
@Override
public void checkIntegrity() throws IOException {
// term dictionary

View File

@ -42,6 +42,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
@ -292,29 +293,10 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return brToString(termBytes);
return ToStringUtils.bytesRefToString(termBytes);
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
} catch (Throwable t) {
// If BytesRef isn't actually UTF8, or it's eg a
// prefix of UTF8 that ends mid-unicode-char, we
// fallback to hex:
return b.toString();
}
}
// for debugging
@SuppressWarnings("unused")
static String brToString(byte[] b) {
return brToString(new BytesRef(b));
}
private static final class PendingBlock extends PendingEntry {
public final BytesRef prefix;
public final long fp;
@ -347,7 +329,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
@Override
public String toString() {
return "BLOCK: " + brToString(prefix);
return "BLOCK: " + ToStringUtils.bytesRefToString(prefix);
}
public void compileIndex(
@ -610,8 +592,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
long startFP = out.getFilePointer();
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + "
// floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor +
// " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" +
// hasTerms + " hasSubBlocks=" + hasSubBlocks);
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
@ -630,11 +612,11 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
out.writeVInt(code);
// if (DEBUG) {
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + "
// pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? ("
// floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" +
// isLastInFloor);
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment +
// " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" +
// ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP +
// (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") +
// " isLastInFloor=" + isLastInFloor);
// }
// 1st pass: pack term suffix bytes into byte[] blob
@ -737,7 +719,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
BytesRef suffixBytes = new BytesRef(suffix);
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix;
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) +
" subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
}
*/
@ -824,8 +807,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize +
// " minItemsInBlock=" + minItemsInBlock);
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}