mirror of https://github.com/apache/lucene.git
LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5752@1603489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cc3eaed1bc
commit
ae6ea5d4f0
|
@ -24,6 +24,7 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.Sorter;
|
||||
|
||||
|
@ -71,7 +72,7 @@ public class Automaton {
|
|||
/** Holds toState, min, max for each transition. */
|
||||
private int[] transitions = new int[6];
|
||||
|
||||
private final Set<Integer> acceptStates = new HashSet<Integer>();
|
||||
private FixedBitSet isAccept = new FixedBitSet(4);
|
||||
|
||||
/** True if no state has two transitions leaving with the same label. */
|
||||
private boolean deterministic = true;
|
||||
|
@ -86,18 +87,23 @@ public class Automaton {
|
|||
int state = nextState/2;
|
||||
states[nextState] = -1;
|
||||
nextState += 2;
|
||||
if (state >= isAccept.length()) {
|
||||
FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1));
|
||||
newBits.or(isAccept);
|
||||
isAccept = newBits;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
/** Set or clear this state as an accept state. */
|
||||
public void setAccept(int state, boolean isAccept) {
|
||||
public void setAccept(int state, boolean accept) {
|
||||
if (state >= getNumStates()) {
|
||||
throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
|
||||
}
|
||||
if (isAccept) {
|
||||
acceptStates.add(state);
|
||||
if (accept) {
|
||||
isAccept.set(state);
|
||||
} else {
|
||||
acceptStates.remove(state);
|
||||
isAccept.clear(state);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,14 +125,14 @@ public class Automaton {
|
|||
return transitions;
|
||||
}
|
||||
|
||||
/** Returns accept states. */
|
||||
public Set<Integer> getAcceptStates() {
|
||||
return acceptStates;
|
||||
/** Returns accept states. If the bit is set then that state is an accept state. */
|
||||
FixedBitSet getAcceptStates() {
|
||||
return isAccept;
|
||||
}
|
||||
|
||||
/** Returns true if this state is an accept state. */
|
||||
public boolean isAccept(int state) {
|
||||
return acceptStates.contains(state);
|
||||
return isAccept.get(state);
|
||||
}
|
||||
|
||||
/** Add a new transition with min = max = label. */
|
||||
|
@ -195,12 +201,19 @@ public class Automaton {
|
|||
if (states[nextState+i] != -1) {
|
||||
states[nextState+i] += nextTransition;
|
||||
}
|
||||
int state = i/2;
|
||||
}
|
||||
nextState += other.nextState;
|
||||
|
||||
for(int s : other.getAcceptStates()) {
|
||||
setAccept(stateOffset+s, true);
|
||||
if (isAccept.length() < nextState/2) {
|
||||
FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2, 1));
|
||||
newBits.or(isAccept);
|
||||
isAccept = newBits;
|
||||
}
|
||||
int otherNumStates = other.getNumStates();
|
||||
FixedBitSet otherAcceptStates = other.getAcceptStates();
|
||||
int state = 0;
|
||||
while (state < otherNumStates && (state = otherAcceptStates.nextSetBit(state)) != -1) {
|
||||
setAccept(stateOffset + state, true);
|
||||
state++;
|
||||
}
|
||||
|
||||
// Bulk copy and then fixup dest for each transition:
|
||||
|
|
|
@ -42,6 +42,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
@ -239,7 +240,7 @@ final public class Operations {
|
|||
b = concatenate(as);
|
||||
}
|
||||
|
||||
Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates());
|
||||
Set<Integer> prevAcceptStates = toSet(b, 0);
|
||||
|
||||
for(int i=min;i<max;i++) {
|
||||
int numStates = b.getNumStates();
|
||||
|
@ -247,16 +248,26 @@ final public class Operations {
|
|||
for(int s : prevAcceptStates) {
|
||||
b.addEpsilon(s, numStates);
|
||||
}
|
||||
prevAcceptStates.clear();
|
||||
for(int s : a.getAcceptStates()) {
|
||||
prevAcceptStates.add(numStates+s);
|
||||
}
|
||||
prevAcceptStates = toSet(a, numStates);
|
||||
}
|
||||
|
||||
b.finishState();
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
private static Set<Integer> toSet(Automaton a, int offset) {
|
||||
int numStates = a.getNumStates();
|
||||
FixedBitSet isAccept = a.getAcceptStates();
|
||||
Set<Integer> result = new HashSet<Integer>();
|
||||
int upto = 0;
|
||||
while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
|
||||
result.add(offset+upto);
|
||||
upto++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a (deterministic) automaton that accepts the complement of the
|
||||
|
@ -913,13 +924,16 @@ final public class Operations {
|
|||
|
||||
LinkedList<Integer> workList = new LinkedList<>();
|
||||
BitSet live = new BitSet(numStates);
|
||||
for (int s : a.getAcceptStates()) {
|
||||
FixedBitSet acceptBits = a.getAcceptStates();
|
||||
int s = 0;
|
||||
while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) {
|
||||
live.set(s);
|
||||
workList.add(s);
|
||||
s++;
|
||||
}
|
||||
|
||||
while (workList.isEmpty() == false) {
|
||||
int s = workList.removeFirst();
|
||||
s = workList.removeFirst();
|
||||
int count = a2.initTransition(s, t);
|
||||
for(int i=0;i<count;i++) {
|
||||
a2.getNextTransition(t);
|
||||
|
@ -971,6 +985,7 @@ final public class Operations {
|
|||
assert hasDeadStates(result) == false;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the largest entry whose value is less than or equal to c, or 0 if
|
||||
* there is no such entry.
|
||||
|
@ -988,7 +1003,8 @@ final public class Operations {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns true if the language of this automaton is finite.
|
||||
* Returns true if the language of this automaton is finite. The
|
||||
* automaton must not have any dead states.
|
||||
*/
|
||||
public static boolean isFinite(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
|
@ -998,7 +1014,7 @@ final public class Operations {
|
|||
}
|
||||
|
||||
/**
|
||||
* Checks whether there is a loop containing s. (This is sufficient since
|
||||
* Checks whether there is a loop containing state. (This is sufficient since
|
||||
* there are never transitions to dead states.)
|
||||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
|
@ -1144,12 +1160,14 @@ final public class Operations {
|
|||
|
||||
Automaton result = builder.finish();
|
||||
|
||||
for(int s : a.getAcceptStates()) {
|
||||
assert s < numStates;
|
||||
int s = 0;
|
||||
FixedBitSet acceptStates = a.getAcceptStates();
|
||||
while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
|
||||
result.addEpsilon(0, s+1);
|
||||
if (initialStates != null) {
|
||||
initialStates.add(s+1);
|
||||
}
|
||||
s++;
|
||||
}
|
||||
|
||||
result.finishState();
|
||||
|
|
Loading…
Reference in New Issue