automaton initial state is always 0

This commit is contained in:
Mike McCandless 2016-10-10 13:43:46 -04:00
parent 257ea3423f
commit 4fe3110e49
14 changed files with 22 additions and 33 deletions

View File

@ -474,7 +474,7 @@ final class Stemmer {
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) { private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
if (condition != 0) { if (condition != 0) {
CharacterRunAutomaton pattern = dictionary.patterns.get(condition); CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
int state = pattern.getInitialState(); int state = 0;
for (int i = c1off; i < c1off + c1len; i++) { for (int i = c1off; i < c1off + c1len; i++) {
state = pattern.step(state, c1[i]); state = pattern.step(state, c1[i]);
if (state == -1) { if (state == -1) {

View File

@ -92,7 +92,7 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
final OrdsIntersectTermsEnumFrame f = stack[0]; final OrdsIntersectTermsEnumFrame f = stack[0];
f.fp = f.fpOrig = fr.rootBlockFP; f.fp = f.fpOrig = fr.rootBlockFP;
f.prefix = 0; f.prefix = 0;
f.setState(runAutomaton.getInitialState()); f.setState(0);
f.arc = arc; f.arc = arc;
f.outputPrefix = arc.output; f.outputPrefix = arc.output;
f.load(fr.rootCode); f.load(fr.rootCode);

View File

@ -969,7 +969,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
states = new State[1]; states = new State[1];
states[0] = new State(); states[0] = new State();
states[0].changeOrd = terms.length; states[0].changeOrd = terms.length;
states[0].state = runAutomaton.getInitialState(); states[0].state = 0;
states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state); states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition); compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
states[0].transitionUpto = -1; states[0].transitionUpto = -1;

View File

@ -705,7 +705,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
/** Load frame for start arc(node) on fst */ /** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) throws IOException { Frame loadFirstFrame(Frame frame) throws IOException {
frame.arc = fst.getFirstArc(frame.arc); frame.arc = fst.getFirstArc(frame.arc);
frame.state = fsa.getInitialState(); frame.state = 0;
return frame; return frame;
} }

View File

@ -602,7 +602,7 @@ public class FSTTermsReader extends FieldsProducer {
/** Load frame for start arc(node) on fst */ /** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) throws IOException { Frame loadFirstFrame(Frame frame) throws IOException {
frame.fstArc = fst.getFirstArc(frame.fstArc); frame.fstArc = fst.getFirstArc(frame.fstArc);
frame.fsaState = fsa.getInitialState(); frame.fsaState = 0;
return frame; return frame;
} }

View File

@ -124,7 +124,7 @@ final class IntersectTermsEnum extends TermsEnum {
final IntersectTermsEnumFrame f = stack[0]; final IntersectTermsEnumFrame f = stack[0];
f.fp = f.fpOrig = fr.rootBlockFP; f.fp = f.fpOrig = fr.rootBlockFP;
f.prefix = 0; f.prefix = 0;
f.setState(runAutomaton.getInitialState()); f.setState(0);
f.arc = arc; f.arc = arc;
f.outputPrefix = arc.output; f.outputPrefix = arc.output;
f.load(fr.rootCode); f.load(fr.rootCode);

View File

@ -110,7 +110,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
if (term == null) { if (term == null) {
assert seekBytesRef.length() == 0; assert seekBytesRef.length() == 0;
// return the empty term, as it's valid // return the empty term, as it's valid
if (runAutomaton.isAccept(runAutomaton.getInitialState())) { if (runAutomaton.isAccept(0)) {
return seekBytesRef.get(); return seekBytesRef.get();
} }
} else { } else {
@ -135,7 +135,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
private void setLinear(int position) { private void setLinear(int position) {
assert linear == false; assert linear == false;
int state = runAutomaton.getInitialState(); int state = 0;
assert state == 0; assert state == 0;
int maxInterval = 0xff; int maxInterval = 0xff;
//System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef); //System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
@ -182,7 +182,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
int state; int state;
int pos = 0; int pos = 0;
savedStates.grow(seekBytesRef.length()+1); savedStates.grow(seekBytesRef.length()+1);
savedStates.setIntAt(0, runAutomaton.getInitialState()); savedStates.setIntAt(0, 0);
while (true) { while (true) {
curGen++; curGen++;

View File

@ -36,7 +36,7 @@ public class ByteRunAutomaton extends RunAutomaton {
* Returns true if the given byte array is accepted by this automaton * Returns true if the given byte array is accepted by this automaton
*/ */
public boolean run(byte[] s, int offset, int length) { public boolean run(byte[] s, int offset, int length) {
int p = initial; int p = 0;
int l = offset + length; int l = offset + length;
for (int i = offset; i < l; i++) { for (int i = offset; i < l; i++) {
p = step(p, s[i] & 0xFF); p = step(p, s[i] & 0xFF);

View File

@ -43,7 +43,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
* Returns true if the given string is accepted by this automaton. * Returns true if the given string is accepted by this automaton.
*/ */
public boolean run(String s) { public boolean run(String s) {
int p = initial; int p = 0;
int l = s.length(); int l = s.length();
for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) { for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) {
p = step(p, cp = s.codePointAt(i)); p = step(p, cp = s.codePointAt(i));
@ -56,7 +56,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
* Returns true if the given string is accepted by this automaton * Returns true if the given string is accepted by this automaton
*/ */
public boolean run(char[] s, int offset, int length) { public boolean run(char[] s, int offset, int length) {
int p = initial; int p = 0;
int l = offset + length; int l = offset + length;
for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) { for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
p = step(p, cp = Character.codePointAt(s, i, l)); p = step(p, cp = Character.codePointAt(s, i, l));

View File

@ -351,7 +351,7 @@ public class CompiledAutomaton {
//if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString()); //if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());
int state = runAutomaton.getInitialState(); int state = 0;
// Special case empty string: // Special case empty string:
if (input.length == 0) { if (input.length == 0) {

View File

@ -32,7 +32,7 @@ package org.apache.lucene.util.automaton;
import java.util.Arrays; import java.util.Arrays;
/** /**
* Finite-state automaton with fast run operation. * Finite-state automaton with fast run operation. The initial state is always 0.
* *
* @lucene.experimental * @lucene.experimental
*/ */
@ -41,7 +41,6 @@ public abstract class RunAutomaton {
final int maxInterval; final int maxInterval;
final int size; final int size;
final boolean[] accept; final boolean[] accept;
final int initial;
final int[] transitions; // delta(state,c) = transitions[state*points.length + final int[] transitions; // delta(state,c) = transitions[state*points.length +
// getCharClass(c)] // getCharClass(c)]
final int[] points; // char interval start points final int[] points; // char interval start points
@ -53,7 +52,7 @@ public abstract class RunAutomaton {
@Override @Override
public String toString() { public String toString() {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
b.append("initial state: ").append(initial).append("\n"); b.append("initial state: 0\n");
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
b.append("state " + i); b.append("state " + i);
if (accept[i]) b.append(" [accept]:\n"); if (accept[i]) b.append(" [accept]:\n");
@ -92,13 +91,6 @@ public abstract class RunAutomaton {
return accept[state]; return accept[state];
} }
/**
* Returns initial state.
*/
public final int getInitialState() {
return initial;
}
/** /**
* Returns array of codepoint class interval start points. The array should * Returns array of codepoint class interval start points. The array should
* not be modified by the caller. * not be modified by the caller.
@ -138,7 +130,6 @@ public abstract class RunAutomaton {
a = Operations.determinize(a, maxDeterminizedStates); a = Operations.determinize(a, maxDeterminizedStates);
this.automaton = a; this.automaton = a;
points = a.getStartPoints(); points = a.getStartPoints();
initial = 0;
size = Math.max(1,a.getNumStates()); size = Math.max(1,a.getNumStates());
accept = new boolean[size]; accept = new boolean[size];
transitions = new int[size * points.length]; transitions = new int[size * points.length];
@ -188,7 +179,6 @@ public abstract class RunAutomaton {
public int hashCode() { public int hashCode() {
final int prime = 31; final int prime = 31;
int result = 1; int result = 1;
result = prime * result + initial;
result = prime * result + maxInterval; result = prime * result + maxInterval;
result = prime * result + points.length; result = prime * result + points.length;
result = prime * result + size; result = prime * result + size;
@ -201,7 +191,6 @@ public abstract class RunAutomaton {
if (obj == null) return false; if (obj == null) return false;
if (getClass() != obj.getClass()) return false; if (getClass() != obj.getClass()) return false;
RunAutomaton other = (RunAutomaton) obj; RunAutomaton other = (RunAutomaton) obj;
if (initial != other.initial) return false;
if (maxInterval != other.maxInterval) return false; if (maxInterval != other.maxInterval) return false;
if (size != other.size) return false; if (size != other.size) return false;
if (!Arrays.equals(points, other.points)) return false; if (!Arrays.equals(points, other.points)) return false;

View File

@ -170,7 +170,7 @@ public class TestTermsEnum extends LuceneTestCase {
} }
private boolean accepts(CompiledAutomaton c, BytesRef b) { private boolean accepts(CompiledAutomaton c, BytesRef b) {
int state = c.runAutomaton.getInitialState(); int state = 0;
for(int idx=0;idx<b.length;idx++) { for(int idx=0;idx<b.length;idx++) {
assertTrue(state != -1); assertTrue(state != -1);
state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff); state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
@ -291,7 +291,7 @@ public class TestTermsEnum extends LuceneTestCase {
System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString())); System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString()));
if (startTerm != null) { if (startTerm != null) {
int state = c.runAutomaton.getInitialState(); int state = 0;
for(int idx=0;idx<startTerm.length;idx++) { for(int idx=0;idx<startTerm.length;idx++) {
final int label = startTerm.bytes[startTerm.offset+idx] & 0xff; final int label = startTerm.bytes[startTerm.offset+idx] & 0xff;
System.out.println(" state=" + state + " label=" + label); System.out.println(" state=" + state + " label=" + label);

View File

@ -166,12 +166,12 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton); CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
ByteRunAutomaton bra = new ByteRunAutomaton(automaton); ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
// make sure character dfa accepts empty string // make sure character dfa accepts empty string
assertTrue(cra.isAccept(cra.getInitialState())); assertTrue(cra.isAccept(0));
assertTrue(cra.run("")); assertTrue(cra.run(""));
assertTrue(cra.run(new char[0], 0, 0)); assertTrue(cra.run(new char[0], 0, 0));
// make sure byte dfa accepts empty string // make sure byte dfa accepts empty string
assertTrue(bra.isAccept(bra.getInitialState())); assertTrue(bra.isAccept(0));
assertTrue(bra.run(new byte[0], 0, 0)); assertTrue(bra.run(new byte[0], 0, 0));
} }

View File

@ -92,7 +92,7 @@ public class MockTokenizer extends Tokenizer {
super(factory); super(factory);
this.runAutomaton = runAutomaton; this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase; this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState(); this.state = 0;
this.maxTokenLength = maxTokenLength; this.maxTokenLength = maxTokenLength;
} }
@ -252,7 +252,7 @@ public class MockTokenizer extends Tokenizer {
protected boolean isTokenChar(int c) { protected boolean isTokenChar(int c) {
if (state < 0) { if (state < 0) {
state = runAutomaton.getInitialState(); state = 0;
} }
state = runAutomaton.step(state, c); state = runAutomaton.step(state, c);
if (state < 0) { if (state < 0) {
@ -270,7 +270,7 @@ public class MockTokenizer extends Tokenizer {
public void reset() throws IOException { public void reset() throws IOException {
try { try {
super.reset(); super.reset();
state = runAutomaton.getInitialState(); state = 0;
lastOffset = off = 0; lastOffset = off = 0;
bufferedCodePoint = -1; bufferedCodePoint = -1;
if (streamState == State.RESET) { if (streamState == State.RESET) {