mirror of https://github.com/apache/lucene.git
automaton initial state is always 0
This commit is contained in:
parent
45f2dfe2ce
commit
6fb9aef984
|
@ -474,7 +474,7 @@ final class Stemmer {
|
|||
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
|
||||
if (condition != 0) {
|
||||
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
|
||||
int state = pattern.getInitialState();
|
||||
int state = 0;
|
||||
for (int i = c1off; i < c1off + c1len; i++) {
|
||||
state = pattern.step(state, c1[i]);
|
||||
if (state == -1) {
|
||||
|
|
|
@ -92,7 +92,7 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
|
|||
final OrdsIntersectTermsEnumFrame f = stack[0];
|
||||
f.fp = f.fpOrig = fr.rootBlockFP;
|
||||
f.prefix = 0;
|
||||
f.setState(runAutomaton.getInitialState());
|
||||
f.setState(0);
|
||||
f.arc = arc;
|
||||
f.outputPrefix = arc.output;
|
||||
f.load(fr.rootCode);
|
||||
|
|
|
@ -969,7 +969,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
states = new State[1];
|
||||
states[0] = new State();
|
||||
states[0].changeOrd = terms.length;
|
||||
states[0].state = runAutomaton.getInitialState();
|
||||
states[0].state = 0;
|
||||
states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
|
||||
compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
|
||||
states[0].transitionUpto = -1;
|
||||
|
|
|
@ -705,7 +705,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
/** Load frame for start arc(node) on fst */
|
||||
Frame loadFirstFrame(Frame frame) throws IOException {
|
||||
frame.arc = fst.getFirstArc(frame.arc);
|
||||
frame.state = fsa.getInitialState();
|
||||
frame.state = 0;
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
|
|
@ -602,7 +602,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
/** Load frame for start arc(node) on fst */
|
||||
Frame loadFirstFrame(Frame frame) throws IOException {
|
||||
frame.fstArc = fst.getFirstArc(frame.fstArc);
|
||||
frame.fsaState = fsa.getInitialState();
|
||||
frame.fsaState = 0;
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
final IntersectTermsEnumFrame f = stack[0];
|
||||
f.fp = f.fpOrig = fr.rootBlockFP;
|
||||
f.prefix = 0;
|
||||
f.setState(runAutomaton.getInitialState());
|
||||
f.setState(0);
|
||||
f.arc = arc;
|
||||
f.outputPrefix = arc.output;
|
||||
f.load(fr.rootCode);
|
||||
|
|
|
@ -110,7 +110,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
if (term == null) {
|
||||
assert seekBytesRef.length() == 0;
|
||||
// return the empty term, as it's valid
|
||||
if (runAutomaton.isAccept(runAutomaton.getInitialState())) {
|
||||
if (runAutomaton.isAccept(0)) {
|
||||
return seekBytesRef.get();
|
||||
}
|
||||
} else {
|
||||
|
@ -135,7 +135,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
private void setLinear(int position) {
|
||||
assert linear == false;
|
||||
|
||||
int state = runAutomaton.getInitialState();
|
||||
int state = 0;
|
||||
assert state == 0;
|
||||
int maxInterval = 0xff;
|
||||
//System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
|
||||
|
@ -182,7 +182,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
int state;
|
||||
int pos = 0;
|
||||
savedStates.grow(seekBytesRef.length()+1);
|
||||
savedStates.setIntAt(0, runAutomaton.getInitialState());
|
||||
savedStates.setIntAt(0, 0);
|
||||
|
||||
while (true) {
|
||||
curGen++;
|
||||
|
|
|
@ -36,7 +36,7 @@ public class ByteRunAutomaton extends RunAutomaton {
|
|||
* Returns true if the given byte array is accepted by this automaton
|
||||
*/
|
||||
public boolean run(byte[] s, int offset, int length) {
|
||||
int p = initial;
|
||||
int p = 0;
|
||||
int l = offset + length;
|
||||
for (int i = offset; i < l; i++) {
|
||||
p = step(p, s[i] & 0xFF);
|
||||
|
|
|
@ -43,7 +43,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
|
|||
* Returns true if the given string is accepted by this automaton.
|
||||
*/
|
||||
public boolean run(String s) {
|
||||
int p = initial;
|
||||
int p = 0;
|
||||
int l = s.length();
|
||||
for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) {
|
||||
p = step(p, cp = s.codePointAt(i));
|
||||
|
@ -56,7 +56,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
|
|||
* Returns true if the given string is accepted by this automaton
|
||||
*/
|
||||
public boolean run(char[] s, int offset, int length) {
|
||||
int p = initial;
|
||||
int p = 0;
|
||||
int l = offset + length;
|
||||
for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
|
||||
p = step(p, cp = Character.codePointAt(s, i, l));
|
||||
|
|
|
@ -351,7 +351,7 @@ public class CompiledAutomaton {
|
|||
|
||||
//if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());
|
||||
|
||||
int state = runAutomaton.getInitialState();
|
||||
int state = 0;
|
||||
|
||||
// Special case empty string:
|
||||
if (input.length == 0) {
|
||||
|
|
|
@ -32,7 +32,7 @@ package org.apache.lucene.util.automaton;
|
|||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Finite-state automaton with fast run operation.
|
||||
* Finite-state automaton with fast run operation. The initial state is always 0.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -41,7 +41,6 @@ public abstract class RunAutomaton {
|
|||
final int maxInterval;
|
||||
final int size;
|
||||
final boolean[] accept;
|
||||
final int initial;
|
||||
final int[] transitions; // delta(state,c) = transitions[state*points.length +
|
||||
// getCharClass(c)]
|
||||
final int[] points; // char interval start points
|
||||
|
@ -53,7 +52,7 @@ public abstract class RunAutomaton {
|
|||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append("initial state: ").append(initial).append("\n");
|
||||
b.append("initial state: 0\n");
|
||||
for (int i = 0; i < size; i++) {
|
||||
b.append("state " + i);
|
||||
if (accept[i]) b.append(" [accept]:\n");
|
||||
|
@ -92,13 +91,6 @@ public abstract class RunAutomaton {
|
|||
return accept[state];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns initial state.
|
||||
*/
|
||||
public final int getInitialState() {
|
||||
return initial;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns array of codepoint class interval start points. The array should
|
||||
* not be modified by the caller.
|
||||
|
@ -138,7 +130,6 @@ public abstract class RunAutomaton {
|
|||
a = Operations.determinize(a, maxDeterminizedStates);
|
||||
this.automaton = a;
|
||||
points = a.getStartPoints();
|
||||
initial = 0;
|
||||
size = Math.max(1,a.getNumStates());
|
||||
accept = new boolean[size];
|
||||
transitions = new int[size * points.length];
|
||||
|
@ -188,7 +179,6 @@ public abstract class RunAutomaton {
|
|||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + initial;
|
||||
result = prime * result + maxInterval;
|
||||
result = prime * result + points.length;
|
||||
result = prime * result + size;
|
||||
|
@ -201,7 +191,6 @@ public abstract class RunAutomaton {
|
|||
if (obj == null) return false;
|
||||
if (getClass() != obj.getClass()) return false;
|
||||
RunAutomaton other = (RunAutomaton) obj;
|
||||
if (initial != other.initial) return false;
|
||||
if (maxInterval != other.maxInterval) return false;
|
||||
if (size != other.size) return false;
|
||||
if (!Arrays.equals(points, other.points)) return false;
|
||||
|
|
|
@ -170,7 +170,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private boolean accepts(CompiledAutomaton c, BytesRef b) {
|
||||
int state = c.runAutomaton.getInitialState();
|
||||
int state = 0;
|
||||
for(int idx=0;idx<b.length;idx++) {
|
||||
assertTrue(state != -1);
|
||||
state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
|
||||
|
@ -286,7 +286,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString()));
|
||||
|
||||
if (startTerm != null) {
|
||||
int state = c.runAutomaton.getInitialState();
|
||||
int state = 0;
|
||||
for(int idx=0;idx<startTerm.length;idx++) {
|
||||
final int label = startTerm.bytes[startTerm.offset+idx] & 0xff;
|
||||
System.out.println(" state=" + state + " label=" + label);
|
||||
|
|
|
@ -166,12 +166,12 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||
// make sure character dfa accepts empty string
|
||||
assertTrue(cra.isAccept(cra.getInitialState()));
|
||||
assertTrue(cra.isAccept(0));
|
||||
assertTrue(cra.run(""));
|
||||
assertTrue(cra.run(new char[0], 0, 0));
|
||||
|
||||
// make sure byte dfa accepts empty string
|
||||
assertTrue(bra.isAccept(bra.getInitialState()));
|
||||
assertTrue(bra.isAccept(0));
|
||||
assertTrue(bra.run(new byte[0], 0, 0));
|
||||
}
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ public class MockTokenizer extends Tokenizer {
|
|||
super(factory);
|
||||
this.runAutomaton = runAutomaton;
|
||||
this.lowerCase = lowerCase;
|
||||
this.state = runAutomaton.getInitialState();
|
||||
this.state = 0;
|
||||
this.maxTokenLength = maxTokenLength;
|
||||
}
|
||||
|
||||
|
@ -252,7 +252,7 @@ public class MockTokenizer extends Tokenizer {
|
|||
|
||||
protected boolean isTokenChar(int c) {
|
||||
if (state < 0) {
|
||||
state = runAutomaton.getInitialState();
|
||||
state = 0;
|
||||
}
|
||||
state = runAutomaton.step(state, c);
|
||||
if (state < 0) {
|
||||
|
@ -270,7 +270,7 @@ public class MockTokenizer extends Tokenizer {
|
|||
public void reset() throws IOException {
|
||||
try {
|
||||
super.reset();
|
||||
state = runAutomaton.getInitialState();
|
||||
state = 0;
|
||||
lastOffset = off = 0;
|
||||
bufferedCodePoint = -1;
|
||||
if (streamState == State.RESET) {
|
||||
|
|
Loading…
Reference in New Issue