LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5752@1603489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-06-18 15:41:49 +00:00
parent cc3eaed1bc
commit ae6ea5d4f0
2 changed files with 55 additions and 24 deletions

View File

@ -24,6 +24,7 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.Sorter;
@ -71,7 +72,7 @@ public class Automaton {
/** Holds toState, min, max for each transition. */
private int[] transitions = new int[6];
private final Set<Integer> acceptStates = new HashSet<Integer>();
private FixedBitSet isAccept = new FixedBitSet(4);
/** True if no state has two transitions leaving with the same label. */
private boolean deterministic = true;
@ -86,18 +87,23 @@ public class Automaton {
int state = nextState/2;
states[nextState] = -1;
nextState += 2;
if (state >= isAccept.length()) {
FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1));
newBits.or(isAccept);
isAccept = newBits;
}
return state;
}
/** Set or clear this state as an accept state. */
public void setAccept(int state, boolean isAccept) {
public void setAccept(int state, boolean accept) {
if (state >= getNumStates()) {
throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
}
if (isAccept) {
acceptStates.add(state);
if (accept) {
isAccept.set(state);
} else {
acceptStates.remove(state);
isAccept.clear(state);
}
}
@ -119,14 +125,14 @@ public class Automaton {
return transitions;
}
/** Returns accept states. */
public Set<Integer> getAcceptStates() {
return acceptStates;
/** Returns accept states. If the bit is set then that state is an accept state. */
FixedBitSet getAcceptStates() {
return isAccept;
}
/** Returns true if this state is an accept state. */
public boolean isAccept(int state) {
return acceptStates.contains(state);
return isAccept.get(state);
}
/** Add a new transition with min = max = label. */
@ -195,12 +201,19 @@ public class Automaton {
if (states[nextState+i] != -1) {
states[nextState+i] += nextTransition;
}
int state = i/2;
}
nextState += other.nextState;
for(int s : other.getAcceptStates()) {
setAccept(stateOffset+s, true);
if (isAccept.length() < nextState/2) {
FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2, 1));
newBits.or(isAccept);
isAccept = newBits;
}
int otherNumStates = other.getNumStates();
FixedBitSet otherAcceptStates = other.getAcceptStates();
int state = 0;
while (state < otherNumStates && (state = otherAcceptStates.nextSetBit(state)) != -1) {
setAccept(stateOffset + state, true);
state++;
}
// Bulk copy and then fixup dest for each transition:

View File

@ -42,6 +42,7 @@ import java.util.Set;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
@ -239,7 +240,7 @@ final public class Operations {
b = concatenate(as);
}
Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates());
Set<Integer> prevAcceptStates = toSet(b, 0);
for(int i=min;i<max;i++) {
int numStates = b.getNumStates();
@ -247,16 +248,26 @@ final public class Operations {
for(int s : prevAcceptStates) {
b.addEpsilon(s, numStates);
}
prevAcceptStates.clear();
for(int s : a.getAcceptStates()) {
prevAcceptStates.add(numStates+s);
}
prevAcceptStates = toSet(a, numStates);
}
b.finishState();
return b;
}
private static Set<Integer> toSet(Automaton a, int offset) {
int numStates = a.getNumStates();
FixedBitSet isAccept = a.getAcceptStates();
Set<Integer> result = new HashSet<Integer>();
int upto = 0;
while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
result.add(offset+upto);
upto++;
}
return result;
}
/**
* Returns a (deterministic) automaton that accepts the complement of the
@ -913,13 +924,16 @@ final public class Operations {
LinkedList<Integer> workList = new LinkedList<>();
BitSet live = new BitSet(numStates);
for (int s : a.getAcceptStates()) {
FixedBitSet acceptBits = a.getAcceptStates();
int s = 0;
while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) {
live.set(s);
workList.add(s);
s++;
}
while (workList.isEmpty() == false) {
int s = workList.removeFirst();
s = workList.removeFirst();
int count = a2.initTransition(s, t);
for(int i=0;i<count;i++) {
a2.getNextTransition(t);
@ -971,6 +985,7 @@ final public class Operations {
assert hasDeadStates(result) == false;
return result;
}
/**
* Finds the largest entry whose value is less than or equal to c, or 0 if
* there is no such entry.
@ -988,7 +1003,8 @@ final public class Operations {
}
/**
* Returns true if the language of this automaton is finite.
* Returns true if the language of this automaton is finite. The
* automaton must not have any dead states.
*/
public static boolean isFinite(Automaton a) {
if (a.getNumStates() == 0) {
@ -998,7 +1014,7 @@ final public class Operations {
}
/**
* Checks whether there is a loop containing s. (This is sufficient since
* Checks whether there is a loop containing state. (This is sufficient since
* there are never transitions to dead states.)
*/
// TODO: not great that this is recursive... in theory a
@ -1144,12 +1160,14 @@ final public class Operations {
Automaton result = builder.finish();
for(int s : a.getAcceptStates()) {
assert s < numStates;
int s = 0;
FixedBitSet acceptStates = a.getAcceptStates();
while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
result.addEpsilon(0, s+1);
if (initialStates != null) {
initialStates.add(s+1);
}
s++;
}
result.finishState();