mirror of https://github.com/apache/lucene.git
minor FST fixes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1237809 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
76fc82a199
commit
9169b24c84
|
@ -231,7 +231,7 @@ public final class FST<T> {
|
||||||
b.append(" hasOutput");
|
b.append(" hasOutput");
|
||||||
}
|
}
|
||||||
if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
|
if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
|
||||||
b.append(" hasOutput");
|
b.append(" hasFinalOutput");
|
||||||
}
|
}
|
||||||
if (bytesPerArc != 0) {
|
if (bytesPerArc != 0) {
|
||||||
b.append(" arcArray(idx=" + arcIdx + " of " + numArcs + ")");
|
b.append(" arcArray(idx=" + arcIdx + " of " + numArcs + ")");
|
||||||
|
@ -1447,6 +1447,7 @@ public final class FST<T> {
|
||||||
// Find top nodes with highest number of incoming arcs:
|
// Find top nodes with highest number of incoming arcs:
|
||||||
NodeQueue q = new NodeQueue(topN);
|
NodeQueue q = new NodeQueue(topN);
|
||||||
|
|
||||||
|
// TODO: we could use more RAM efficient selection algo here...
|
||||||
NodeAndInCount bottom = null;
|
NodeAndInCount bottom = null;
|
||||||
for(int node=0;node<inCounts.length;node++) {
|
for(int node=0;node<inCounts.length;node++) {
|
||||||
if (inCounts[node] >= minInCountDeref) {
|
if (inCounts[node] >= minInCountDeref) {
|
||||||
|
@ -1515,6 +1516,8 @@ public final class FST<T> {
|
||||||
|
|
||||||
int addressError = 0;
|
int addressError = 0;
|
||||||
|
|
||||||
|
//int totWasted = 0;
|
||||||
|
|
||||||
// Since we re-reverse the bytes, we now write the
|
// Since we re-reverse the bytes, we now write the
|
||||||
// nodes backwards, so that BIT_TARGET_NEXT is
|
// nodes backwards, so that BIT_TARGET_NEXT is
|
||||||
// unchanged:
|
// unchanged:
|
||||||
|
@ -1554,10 +1557,11 @@ public final class FST<T> {
|
||||||
writer.writeByte(ARCS_AS_FIXED_ARRAY);
|
writer.writeByte(ARCS_AS_FIXED_ARRAY);
|
||||||
writer.writeVInt(arc.numArcs);
|
writer.writeVInt(arc.numArcs);
|
||||||
writer.writeVInt(bytesPerArc);
|
writer.writeVInt(bytesPerArc);
|
||||||
|
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
|
||||||
}
|
}
|
||||||
|
|
||||||
int maxBytesPerArc = 0;
|
int maxBytesPerArc = 0;
|
||||||
|
//int wasted = 0;
|
||||||
while(true) { // iterate over all arcs for this node
|
while(true) { // iterate over all arcs for this node
|
||||||
|
|
||||||
//System.out.println(" arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite);
|
//System.out.println(" arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite);
|
||||||
|
@ -1680,6 +1684,7 @@ public final class FST<T> {
|
||||||
// incoming FST did... but in this case we
|
// incoming FST did... but in this case we
|
||||||
// will retry (below) so it's OK to ovewrite
|
// will retry (below) so it's OK to ovewrite
|
||||||
// bytes:
|
// bytes:
|
||||||
|
//wasted += bytesPerArc - arcBytes;
|
||||||
writer.setPosWrite(arcStartPos + bytesPerArc);
|
writer.setPosWrite(arcStartPos + bytesPerArc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1693,6 +1698,8 @@ public final class FST<T> {
|
||||||
if (useArcArray) {
|
if (useArcArray) {
|
||||||
if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
|
if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
|
||||||
// converged
|
// converged
|
||||||
|
//System.out.println(" bba=" + bytesPerArc + " wasted=" + wasted);
|
||||||
|
//totWasted += wasted;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1719,6 +1726,7 @@ public final class FST<T> {
|
||||||
// other nodes because we only produce acyclic FSTs
|
// other nodes because we only produce acyclic FSTs
|
||||||
// w/ nodes only pointing "forwards":
|
// w/ nodes only pointing "forwards":
|
||||||
assert !negDelta;
|
assert !negDelta;
|
||||||
|
//System.out.println("TOT wasted=" + totWasted);
|
||||||
// Converged!
|
// Converged!
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1730,7 +1738,7 @@ public final class FST<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fst.startNode = newNodeAddress[startNode];
|
fst.startNode = newNodeAddress[startNode];
|
||||||
//System.out.println("new startNode=" + startNode);
|
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
|
||||||
|
|
||||||
if (emptyOutput != null) {
|
if (emptyOutput != null) {
|
||||||
fst.setEmptyOutput(emptyOutput);
|
fst.setEmptyOutput(emptyOutput);
|
||||||
|
|
|
@ -530,7 +530,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now rewrite");
|
System.out.println("TEST: now rewrite");
|
||||||
}
|
}
|
||||||
final FST<T> packed =fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
|
final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: now verify packed FST");
|
System.out.println("TEST: now verify packed FST");
|
||||||
}
|
}
|
||||||
|
@ -1308,13 +1308,13 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
System.out.println("Pack...");
|
System.out.println("Pack...");
|
||||||
fst = fst.pack(4, 100000000);
|
fst = fst.pack(4, 100000000);
|
||||||
System.out.println("New size " + fst.sizeInBytes() + " bytes");
|
System.out.println("New size " + fst.sizeInBytes() + " bytes");
|
||||||
} else {
|
|
||||||
Directory dir = FSDirectory.open(new File(dirOut));
|
|
||||||
IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
|
|
||||||
fst.save(out);
|
|
||||||
out.close();
|
|
||||||
System.out.println("Saved FST to fst.bin.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Directory dir = FSDirectory.open(new File(dirOut));
|
||||||
|
IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
|
||||||
|
fst.save(out);
|
||||||
|
out.close();
|
||||||
|
System.out.println("Saved FST to fst.bin.");
|
||||||
|
|
||||||
if (!verify) {
|
if (!verify) {
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in New Issue