mirror of https://github.com/apache/lucene.git
LUCENE-4593: first step towards FST storage abstraction
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1420014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ea9bffece7
commit
17f8b6cf36
|
@ -275,7 +275,7 @@ public final class FST<T> {
|
||||||
inCounts = null;
|
inCounts = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
writer = new BytesWriter();
|
writer = new DefaultBytesWriter();
|
||||||
|
|
||||||
emptyOutput = null;
|
emptyOutput = null;
|
||||||
packed = false;
|
packed = false;
|
||||||
|
@ -360,8 +360,8 @@ public final class FST<T> {
|
||||||
if (this.startNode != -1) {
|
if (this.startNode != -1) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
byte[] finalBytes = new byte[writer.posWrite];
|
byte[] finalBytes = new byte[writer.getPosition()];
|
||||||
System.arraycopy(bytes, 0, finalBytes, 0, writer.posWrite);
|
System.arraycopy(bytes, 0, finalBytes, 0, writer.getPosition());
|
||||||
bytes = finalBytes;
|
bytes = finalBytes;
|
||||||
this.startNode = startNode;
|
this.startNode = startNode;
|
||||||
|
|
||||||
|
@ -415,23 +415,23 @@ public final class FST<T> {
|
||||||
|
|
||||||
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
|
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
|
||||||
// bytes private
|
// bytes private
|
||||||
final int posSave = writer.posWrite;
|
final int posSave = writer.getPosition();
|
||||||
outputs.writeFinalOutput(emptyOutput, writer);
|
outputs.writeFinalOutput(emptyOutput, writer);
|
||||||
emptyOutputBytes = new byte[writer.posWrite-posSave];
|
emptyOutputBytes = new byte[writer.getPosition()-posSave];
|
||||||
|
|
||||||
if (!packed) {
|
if (!packed) {
|
||||||
// reverse
|
// reverse
|
||||||
final int stopAt = (writer.posWrite - posSave)/2;
|
final int stopAt = (writer.getPosition() - posSave)/2;
|
||||||
int upto = 0;
|
int upto = 0;
|
||||||
while(upto < stopAt) {
|
while(upto < stopAt) {
|
||||||
final byte b = bytes[posSave + upto];
|
final byte b = bytes[posSave + upto];
|
||||||
bytes[posSave+upto] = bytes[writer.posWrite-upto-1];
|
bytes[posSave+upto] = bytes[writer.getPosition()-upto-1];
|
||||||
bytes[writer.posWrite-upto-1] = b;
|
bytes[writer.getPosition()-upto-1] = b;
|
||||||
upto++;
|
upto++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
System.arraycopy(bytes, posSave, emptyOutputBytes, 0, writer.posWrite-posSave);
|
System.arraycopy(bytes, posSave, emptyOutputBytes, 0, writer.getPosition()-posSave);
|
||||||
writer.posWrite = posSave;
|
writer.setPosition(posSave);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void save(DataOutput out) throws IOException {
|
public void save(DataOutput out) throws IOException {
|
||||||
|
@ -562,7 +562,7 @@ public final class FST<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int startAddress = writer.posWrite;
|
int startAddress = writer.getPosition();
|
||||||
//System.out.println(" startAddr=" + startAddress);
|
//System.out.println(" startAddr=" + startAddress);
|
||||||
|
|
||||||
final boolean doFixedArray = shouldExpand(nodeIn);
|
final boolean doFixedArray = shouldExpand(nodeIn);
|
||||||
|
@ -578,7 +578,7 @@ public final class FST<T> {
|
||||||
// of bytes per arc (int) here:
|
// of bytes per arc (int) here:
|
||||||
// TODO: we could make this a vInt instead
|
// TODO: we could make this a vInt instead
|
||||||
writer.writeInt(0);
|
writer.writeInt(0);
|
||||||
fixedArrayStart = writer.posWrite;
|
fixedArrayStart = writer.getPosition();
|
||||||
//System.out.println(" do fixed arcs array arcsStart=" + fixedArrayStart);
|
//System.out.println(" do fixed arcs array arcsStart=" + fixedArrayStart);
|
||||||
} else {
|
} else {
|
||||||
fixedArrayStart = 0;
|
fixedArrayStart = 0;
|
||||||
|
@ -588,7 +588,7 @@ public final class FST<T> {
|
||||||
|
|
||||||
final int lastArc = nodeIn.numArcs-1;
|
final int lastArc = nodeIn.numArcs-1;
|
||||||
|
|
||||||
int lastArcStart = writer.posWrite;
|
int lastArcStart = writer.getPosition();
|
||||||
int maxBytesPerArc = 0;
|
int maxBytesPerArc = 0;
|
||||||
for(int arcIdx=0;arcIdx<nodeIn.numArcs;arcIdx++) {
|
for(int arcIdx=0;arcIdx<nodeIn.numArcs;arcIdx++) {
|
||||||
final Builder.Arc<T> arc = nodeIn.arcs[arcIdx];
|
final Builder.Arc<T> arc = nodeIn.arcs[arcIdx];
|
||||||
|
@ -653,8 +653,8 @@ public final class FST<T> {
|
||||||
// but record how many bytes each one took, and max
|
// but record how many bytes each one took, and max
|
||||||
// byte size:
|
// byte size:
|
||||||
if (doFixedArray) {
|
if (doFixedArray) {
|
||||||
bytesPerArc[arcIdx] = writer.posWrite - lastArcStart;
|
bytesPerArc[arcIdx] = writer.getPosition() - lastArcStart;
|
||||||
lastArcStart = writer.posWrite;
|
lastArcStart = writer.getPosition();
|
||||||
maxBytesPerArc = Math.max(maxBytesPerArc, bytesPerArc[arcIdx]);
|
maxBytesPerArc = Math.max(maxBytesPerArc, bytesPerArc[arcIdx]);
|
||||||
//System.out.println(" bytes=" + bytesPerArc[arcIdx]);
|
//System.out.println(" bytes=" + bytesPerArc[arcIdx]);
|
||||||
}
|
}
|
||||||
|
@ -681,9 +681,9 @@ public final class FST<T> {
|
||||||
bytes[fixedArrayStart-1] = (byte) maxBytesPerArc;
|
bytes[fixedArrayStart-1] = (byte) maxBytesPerArc;
|
||||||
|
|
||||||
// expand the arcs in place, backwards
|
// expand the arcs in place, backwards
|
||||||
int srcPos = writer.posWrite;
|
int srcPos = writer.getPosition();
|
||||||
int destPos = fixedArrayStart + nodeIn.numArcs*maxBytesPerArc;
|
int destPos = fixedArrayStart + nodeIn.numArcs*maxBytesPerArc;
|
||||||
writer.posWrite = destPos;
|
writer.setPosition(destPos);
|
||||||
for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
|
for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
|
||||||
//System.out.println(" repack arcIdx=" + arcIdx + " srcPos=" + srcPos + " destPos=" + destPos);
|
//System.out.println(" repack arcIdx=" + arcIdx + " srcPos=" + srcPos + " destPos=" + destPos);
|
||||||
destPos -= maxBytesPerArc;
|
destPos -= maxBytesPerArc;
|
||||||
|
@ -698,7 +698,7 @@ public final class FST<T> {
|
||||||
// reverse bytes in-place; we do this so that the
|
// reverse bytes in-place; we do this so that the
|
||||||
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
|
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
|
||||||
// node just before the current one
|
// node just before the current one
|
||||||
final int endAddress = writer.posWrite - 1;
|
final int endAddress = writer.getPosition() - 1;
|
||||||
|
|
||||||
int left = startAddress;
|
int left = startAddress;
|
||||||
int right = endAddress;
|
int right = endAddress;
|
||||||
|
@ -908,17 +908,18 @@ public final class FST<T> {
|
||||||
|
|
||||||
if (arc.label == END_LABEL) {
|
if (arc.label == END_LABEL) {
|
||||||
//System.out.println(" nextArc fake " + arc.nextArc);
|
//System.out.println(" nextArc fake " + arc.nextArc);
|
||||||
in.pos = getNodeAddress(arc.nextArc);
|
int pos = in.pos = getNodeAddress(arc.nextArc);
|
||||||
final byte b = bytes[in.pos];
|
final byte b = in.readByte();
|
||||||
if (b == ARCS_AS_FIXED_ARRAY) {
|
if (b == ARCS_AS_FIXED_ARRAY) {
|
||||||
//System.out.println(" nextArc fake array");
|
//System.out.println(" nextArc fake array");
|
||||||
in.skip(1);
|
|
||||||
in.readVInt();
|
in.readVInt();
|
||||||
if (packed) {
|
if (packed) {
|
||||||
in.readVInt();
|
in.readVInt();
|
||||||
} else {
|
} else {
|
||||||
in.readInt();
|
in.readInt();
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
in.pos = pos;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (arc.bytesPerArc != 0) {
|
if (arc.bytesPerArc != 0) {
|
||||||
|
@ -1184,11 +1185,16 @@ public final class FST<T> {
|
||||||
node.numArcs >= FIXED_ARRAY_NUM_ARCS_DEEP);
|
node.numArcs >= FIXED_ARRAY_NUM_ARCS_DEEP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static abstract class BytesWriter extends DataOutput {
|
||||||
|
public abstract void setPosition(int posWrite);
|
||||||
|
public abstract int getPosition();
|
||||||
|
}
|
||||||
|
|
||||||
// Non-static: writes to FST's byte[]
|
// Non-static: writes to FST's byte[]
|
||||||
class BytesWriter extends DataOutput {
|
class DefaultBytesWriter extends BytesWriter {
|
||||||
int posWrite;
|
int posWrite;
|
||||||
|
|
||||||
public BytesWriter() {
|
public DefaultBytesWriter() {
|
||||||
// pad: ensure no node gets address 0 which is reserved to mean
|
// pad: ensure no node gets address 0 which is reserved to mean
|
||||||
// the stop state w/ no arcs
|
// the stop state w/ no arcs
|
||||||
posWrite = 1;
|
posWrite = 1;
|
||||||
|
@ -1205,7 +1211,13 @@ public final class FST<T> {
|
||||||
bytes[posWrite++] = b;
|
bytes[posWrite++] = b;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPosWrite(int posWrite) {
|
@Override
|
||||||
|
public int getPosition() {
|
||||||
|
return posWrite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setPosition(int posWrite) {
|
||||||
this.posWrite = posWrite;
|
this.posWrite = posWrite;
|
||||||
if (bytes.length < posWrite) {
|
if (bytes.length < posWrite) {
|
||||||
assert bytes.length < Integer.MAX_VALUE: "FST too large (> 2.1 GB)";
|
assert bytes.length < Integer.MAX_VALUE: "FST too large (> 2.1 GB)";
|
||||||
|
@ -1436,7 +1448,7 @@ public final class FST<T> {
|
||||||
this.nodeRefToAddress = nodeRefToAddress;
|
this.nodeRefToAddress = nodeRefToAddress;
|
||||||
this.outputs = outputs;
|
this.outputs = outputs;
|
||||||
NO_OUTPUT = outputs.getNoOutput();
|
NO_OUTPUT = outputs.getNoOutput();
|
||||||
writer = new BytesWriter();
|
writer = new DefaultBytesWriter();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Expert: creates an FST by packing this one. This
|
/** Expert: creates an FST by packing this one. This
|
||||||
|
@ -1529,7 +1541,7 @@ public final class FST<T> {
|
||||||
// for assert:
|
// for assert:
|
||||||
boolean negDelta = false;
|
boolean negDelta = false;
|
||||||
|
|
||||||
writer.posWrite = 0;
|
writer.setPosition(0);
|
||||||
// Skip 0 byte since 0 is reserved target:
|
// Skip 0 byte since 0 is reserved target:
|
||||||
writer.writeByte((byte) 0);
|
writer.writeByte((byte) 0);
|
||||||
|
|
||||||
|
@ -1550,7 +1562,7 @@ public final class FST<T> {
|
||||||
// unchanged:
|
// unchanged:
|
||||||
for(int node=nodeCount;node>=1;node--) {
|
for(int node=nodeCount;node>=1;node--) {
|
||||||
fst.nodeCount++;
|
fst.nodeCount++;
|
||||||
final int address = writer.posWrite;
|
final int address = writer.getPosition();
|
||||||
//System.out.println(" node: " + node + " address=" + address);
|
//System.out.println(" node: " + node + " address=" + address);
|
||||||
if (address != newNodeAddress.get(node)) {
|
if (address != newNodeAddress.get(node)) {
|
||||||
addressError = address - (int) newNodeAddress.get(node);
|
addressError = address - (int) newNodeAddress.get(node);
|
||||||
|
@ -1592,7 +1604,7 @@ public final class FST<T> {
|
||||||
while(true) { // iterate over all arcs for this node
|
while(true) { // iterate over all arcs for this node
|
||||||
|
|
||||||
//System.out.println(" arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite);
|
//System.out.println(" arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite);
|
||||||
final int arcStartPos = writer.posWrite;
|
final int arcStartPos = writer.getPosition();
|
||||||
nodeArcCount++;
|
nodeArcCount++;
|
||||||
|
|
||||||
byte flags = 0;
|
byte flags = 0;
|
||||||
|
@ -1639,7 +1651,7 @@ public final class FST<T> {
|
||||||
absPtr = topNodeMap.size() + (int) newNodeAddress.get(arc.target) + addressError;
|
absPtr = topNodeMap.size() + (int) newNodeAddress.get(arc.target) + addressError;
|
||||||
}
|
}
|
||||||
|
|
||||||
int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.posWrite - 2;
|
int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.getPosition() - 2;
|
||||||
if (delta < 0) {
|
if (delta < 0) {
|
||||||
//System.out.println("neg: " + delta);
|
//System.out.println("neg: " + delta);
|
||||||
anyNegDelta = true;
|
anyNegDelta = true;
|
||||||
|
@ -1669,7 +1681,7 @@ public final class FST<T> {
|
||||||
|
|
||||||
if (doWriteTarget) {
|
if (doWriteTarget) {
|
||||||
|
|
||||||
int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.posWrite;
|
int delta = (int) newNodeAddress.get(arc.target) + addressError - writer.getPosition();
|
||||||
if (delta < 0) {
|
if (delta < 0) {
|
||||||
anyNegDelta = true;
|
anyNegDelta = true;
|
||||||
//System.out.println("neg: " + delta);
|
//System.out.println("neg: " + delta);
|
||||||
|
@ -1702,7 +1714,7 @@ public final class FST<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (useArcArray) {
|
if (useArcArray) {
|
||||||
final int arcBytes = writer.posWrite - arcStartPos;
|
final int arcBytes = writer.getPosition() - arcStartPos;
|
||||||
//System.out.println(" " + arcBytes + " bytes");
|
//System.out.println(" " + arcBytes + " bytes");
|
||||||
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
|
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
|
||||||
// NOTE: this may in fact go "backwards", if
|
// NOTE: this may in fact go "backwards", if
|
||||||
|
@ -1712,7 +1724,7 @@ public final class FST<T> {
|
||||||
// will retry (below) so it's OK to ovewrite
|
// will retry (below) so it's OK to ovewrite
|
||||||
// bytes:
|
// bytes:
|
||||||
//wasted += bytesPerArc - arcBytes;
|
//wasted += bytesPerArc - arcBytes;
|
||||||
writer.setPosWrite(arcStartPos + bytesPerArc);
|
writer.setPosition(arcStartPos + bytesPerArc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arc.isLast()) {
|
if (arc.isLast()) {
|
||||||
|
@ -1737,7 +1749,7 @@ public final class FST<T> {
|
||||||
|
|
||||||
// Retry:
|
// Retry:
|
||||||
bytesPerArc = maxBytesPerArc;
|
bytesPerArc = maxBytesPerArc;
|
||||||
writer.posWrite = address;
|
writer.setPosition(address);
|
||||||
nodeArcCount = 0;
|
nodeArcCount = 0;
|
||||||
retry = true;
|
retry = true;
|
||||||
anyNegDelta = false;
|
anyNegDelta = false;
|
||||||
|
@ -1784,9 +1796,9 @@ public final class FST<T> {
|
||||||
assert fst.arcCount == arcCount;
|
assert fst.arcCount == arcCount;
|
||||||
assert fst.arcWithOutputCount == arcWithOutputCount: "fst.arcWithOutputCount=" + fst.arcWithOutputCount + " arcWithOutputCount=" + arcWithOutputCount;
|
assert fst.arcWithOutputCount == arcWithOutputCount: "fst.arcWithOutputCount=" + fst.arcWithOutputCount + " arcWithOutputCount=" + arcWithOutputCount;
|
||||||
|
|
||||||
final byte[] finalBytes = new byte[writer.posWrite];
|
final byte[] finalBytes = new byte[writer.getPosition()];
|
||||||
//System.out.println("resize " + fst.bytes.length + " down to " + writer.posWrite);
|
//System.out.println("resize " + fst.bytes.length + " down to " + writer.posWrite);
|
||||||
System.arraycopy(fst.bytes, 0, finalBytes, 0, writer.posWrite);
|
System.arraycopy(fst.bytes, 0, finalBytes, 0, writer.getPosition());
|
||||||
fst.bytes = finalBytes;
|
fst.bytes = finalBytes;
|
||||||
fst.cacheRootArcs();
|
fst.cacheRootArcs();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue