automaton initial state is always 0

This commit is contained in:
Mike McCandless 2016-10-10 13:43:46 -04:00
parent 257ea3423f
commit 4fe3110e49
14 changed files with 22 additions and 33 deletions

View File

@ -474,7 +474,7 @@ final class Stemmer {
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
if (condition != 0) {
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
int state = pattern.getInitialState();
int state = 0;
for (int i = c1off; i < c1off + c1len; i++) {
state = pattern.step(state, c1[i]);
if (state == -1) {

View File

@ -92,7 +92,7 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
final OrdsIntersectTermsEnumFrame f = stack[0];
f.fp = f.fpOrig = fr.rootBlockFP;
f.prefix = 0;
f.setState(runAutomaton.getInitialState());
f.setState(0);
f.arc = arc;
f.outputPrefix = arc.output;
f.load(fr.rootCode);

View File

@ -969,7 +969,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
states = new State[1];
states[0] = new State();
states[0].changeOrd = terms.length;
states[0].state = runAutomaton.getInitialState();
states[0].state = 0;
states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
states[0].transitionUpto = -1;

View File

@ -705,7 +705,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
/** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) throws IOException {
frame.arc = fst.getFirstArc(frame.arc);
frame.state = fsa.getInitialState();
frame.state = 0;
return frame;
}

View File

@ -602,7 +602,7 @@ public class FSTTermsReader extends FieldsProducer {
/** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) throws IOException {
frame.fstArc = fst.getFirstArc(frame.fstArc);
frame.fsaState = fsa.getInitialState();
frame.fsaState = 0;
return frame;
}

View File

@ -124,7 +124,7 @@ final class IntersectTermsEnum extends TermsEnum {
final IntersectTermsEnumFrame f = stack[0];
f.fp = f.fpOrig = fr.rootBlockFP;
f.prefix = 0;
f.setState(runAutomaton.getInitialState());
f.setState(0);
f.arc = arc;
f.outputPrefix = arc.output;
f.load(fr.rootCode);

View File

@ -110,7 +110,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
if (term == null) {
assert seekBytesRef.length() == 0;
// return the empty term, as it's valid
if (runAutomaton.isAccept(runAutomaton.getInitialState())) {
if (runAutomaton.isAccept(0)) {
return seekBytesRef.get();
}
} else {
@ -135,7 +135,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
private void setLinear(int position) {
assert linear == false;
int state = runAutomaton.getInitialState();
int state = 0;
assert state == 0;
int maxInterval = 0xff;
//System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
@ -182,7 +182,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
int state;
int pos = 0;
savedStates.grow(seekBytesRef.length()+1);
savedStates.setIntAt(0, runAutomaton.getInitialState());
savedStates.setIntAt(0, 0);
while (true) {
curGen++;

View File

@ -36,7 +36,7 @@ public class ByteRunAutomaton extends RunAutomaton {
* Returns true if the given byte array is accepted by this automaton
*/
public boolean run(byte[] s, int offset, int length) {
int p = initial;
int p = 0;
int l = offset + length;
for (int i = offset; i < l; i++) {
p = step(p, s[i] & 0xFF);

View File

@ -43,7 +43,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
* Returns true if the given string is accepted by this automaton.
*/
public boolean run(String s) {
int p = initial;
int p = 0;
int l = s.length();
for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) {
p = step(p, cp = s.codePointAt(i));
@ -56,7 +56,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
* Returns true if the given string is accepted by this automaton
*/
public boolean run(char[] s, int offset, int length) {
int p = initial;
int p = 0;
int l = offset + length;
for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
p = step(p, cp = Character.codePointAt(s, i, l));

View File

@ -351,7 +351,7 @@ public class CompiledAutomaton {
//if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());
int state = runAutomaton.getInitialState();
int state = 0;
// Special case empty string:
if (input.length == 0) {

View File

@ -32,7 +32,7 @@ package org.apache.lucene.util.automaton;
import java.util.Arrays;
/**
* Finite-state automaton with fast run operation.
* Finite-state automaton with fast run operation. The initial state is always 0.
*
* @lucene.experimental
*/
@ -41,7 +41,6 @@ public abstract class RunAutomaton {
final int maxInterval;
final int size;
final boolean[] accept;
final int initial;
final int[] transitions; // delta(state,c) = transitions[state*points.length +
// getCharClass(c)]
final int[] points; // char interval start points
@ -53,7 +52,7 @@ public abstract class RunAutomaton {
@Override
public String toString() {
StringBuilder b = new StringBuilder();
b.append("initial state: ").append(initial).append("\n");
b.append("initial state: 0\n");
for (int i = 0; i < size; i++) {
b.append("state " + i);
if (accept[i]) b.append(" [accept]:\n");
@ -92,13 +91,6 @@ public abstract class RunAutomaton {
return accept[state];
}
/**
* Returns initial state.
*/
public final int getInitialState() {
return initial;
}
/**
* Returns array of codepoint class interval start points. The array should
* not be modified by the caller.
@ -138,7 +130,6 @@ public abstract class RunAutomaton {
a = Operations.determinize(a, maxDeterminizedStates);
this.automaton = a;
points = a.getStartPoints();
initial = 0;
size = Math.max(1,a.getNumStates());
accept = new boolean[size];
transitions = new int[size * points.length];
@ -188,7 +179,6 @@ public abstract class RunAutomaton {
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + initial;
result = prime * result + maxInterval;
result = prime * result + points.length;
result = prime * result + size;
@ -201,7 +191,6 @@ public abstract class RunAutomaton {
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
RunAutomaton other = (RunAutomaton) obj;
if (initial != other.initial) return false;
if (maxInterval != other.maxInterval) return false;
if (size != other.size) return false;
if (!Arrays.equals(points, other.points)) return false;

View File

@ -170,7 +170,7 @@ public class TestTermsEnum extends LuceneTestCase {
}
private boolean accepts(CompiledAutomaton c, BytesRef b) {
int state = c.runAutomaton.getInitialState();
int state = 0;
for(int idx=0;idx<b.length;idx++) {
assertTrue(state != -1);
state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
@ -291,7 +291,7 @@ public class TestTermsEnum extends LuceneTestCase {
System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString()));
if (startTerm != null) {
int state = c.runAutomaton.getInitialState();
int state = 0;
for(int idx=0;idx<startTerm.length;idx++) {
final int label = startTerm.bytes[startTerm.offset+idx] & 0xff;
System.out.println(" state=" + state + " label=" + label);

View File

@ -166,12 +166,12 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
// make sure character dfa accepts empty string
assertTrue(cra.isAccept(cra.getInitialState()));
assertTrue(cra.isAccept(0));
assertTrue(cra.run(""));
assertTrue(cra.run(new char[0], 0, 0));
// make sure byte dfa accepts empty string
assertTrue(bra.isAccept(bra.getInitialState()));
assertTrue(bra.isAccept(0));
assertTrue(bra.run(new byte[0], 0, 0));
}

View File

@ -92,7 +92,7 @@ public class MockTokenizer extends Tokenizer {
super(factory);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
this.state = 0;
this.maxTokenLength = maxTokenLength;
}
@ -252,7 +252,7 @@ public class MockTokenizer extends Tokenizer {
protected boolean isTokenChar(int c) {
if (state < 0) {
state = runAutomaton.getInitialState();
state = 0;
}
state = runAutomaton.step(state, c);
if (state < 0) {
@ -270,7 +270,7 @@ public class MockTokenizer extends Tokenizer {
public void reset() throws IOException {
try {
super.reset();
state = runAutomaton.getInitialState();
state = 0;
lastOffset = off = 0;
bufferedCodePoint = -1;
if (streamState == State.RESET) {