mirror of https://github.com/apache/lucene.git
automaton initial state is always 0
This commit is contained in:
parent
257ea3423f
commit
4fe3110e49
|
@ -474,7 +474,7 @@ final class Stemmer {
|
||||||
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
|
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
|
||||||
if (condition != 0) {
|
if (condition != 0) {
|
||||||
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
|
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
|
||||||
int state = pattern.getInitialState();
|
int state = 0;
|
||||||
for (int i = c1off; i < c1off + c1len; i++) {
|
for (int i = c1off; i < c1off + c1len; i++) {
|
||||||
state = pattern.step(state, c1[i]);
|
state = pattern.step(state, c1[i]);
|
||||||
if (state == -1) {
|
if (state == -1) {
|
||||||
|
|
|
@ -92,7 +92,7 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
|
||||||
final OrdsIntersectTermsEnumFrame f = stack[0];
|
final OrdsIntersectTermsEnumFrame f = stack[0];
|
||||||
f.fp = f.fpOrig = fr.rootBlockFP;
|
f.fp = f.fpOrig = fr.rootBlockFP;
|
||||||
f.prefix = 0;
|
f.prefix = 0;
|
||||||
f.setState(runAutomaton.getInitialState());
|
f.setState(0);
|
||||||
f.arc = arc;
|
f.arc = arc;
|
||||||
f.outputPrefix = arc.output;
|
f.outputPrefix = arc.output;
|
||||||
f.load(fr.rootCode);
|
f.load(fr.rootCode);
|
||||||
|
|
|
@ -969,7 +969,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
||||||
states = new State[1];
|
states = new State[1];
|
||||||
states[0] = new State();
|
states[0] = new State();
|
||||||
states[0].changeOrd = terms.length;
|
states[0].changeOrd = terms.length;
|
||||||
states[0].state = runAutomaton.getInitialState();
|
states[0].state = 0;
|
||||||
states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
|
states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
|
||||||
compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
|
compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
|
||||||
states[0].transitionUpto = -1;
|
states[0].transitionUpto = -1;
|
||||||
|
|
|
@ -705,7 +705,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
||||||
/** Load frame for start arc(node) on fst */
|
/** Load frame for start arc(node) on fst */
|
||||||
Frame loadFirstFrame(Frame frame) throws IOException {
|
Frame loadFirstFrame(Frame frame) throws IOException {
|
||||||
frame.arc = fst.getFirstArc(frame.arc);
|
frame.arc = fst.getFirstArc(frame.arc);
|
||||||
frame.state = fsa.getInitialState();
|
frame.state = 0;
|
||||||
return frame;
|
return frame;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -602,7 +602,7 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
/** Load frame for start arc(node) on fst */
|
/** Load frame for start arc(node) on fst */
|
||||||
Frame loadFirstFrame(Frame frame) throws IOException {
|
Frame loadFirstFrame(Frame frame) throws IOException {
|
||||||
frame.fstArc = fst.getFirstArc(frame.fstArc);
|
frame.fstArc = fst.getFirstArc(frame.fstArc);
|
||||||
frame.fsaState = fsa.getInitialState();
|
frame.fsaState = 0;
|
||||||
return frame;
|
return frame;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -124,7 +124,7 @@ final class IntersectTermsEnum extends TermsEnum {
|
||||||
final IntersectTermsEnumFrame f = stack[0];
|
final IntersectTermsEnumFrame f = stack[0];
|
||||||
f.fp = f.fpOrig = fr.rootBlockFP;
|
f.fp = f.fpOrig = fr.rootBlockFP;
|
||||||
f.prefix = 0;
|
f.prefix = 0;
|
||||||
f.setState(runAutomaton.getInitialState());
|
f.setState(0);
|
||||||
f.arc = arc;
|
f.arc = arc;
|
||||||
f.outputPrefix = arc.output;
|
f.outputPrefix = arc.output;
|
||||||
f.load(fr.rootCode);
|
f.load(fr.rootCode);
|
||||||
|
|
|
@ -110,7 +110,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||||
if (term == null) {
|
if (term == null) {
|
||||||
assert seekBytesRef.length() == 0;
|
assert seekBytesRef.length() == 0;
|
||||||
// return the empty term, as it's valid
|
// return the empty term, as it's valid
|
||||||
if (runAutomaton.isAccept(runAutomaton.getInitialState())) {
|
if (runAutomaton.isAccept(0)) {
|
||||||
return seekBytesRef.get();
|
return seekBytesRef.get();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -135,7 +135,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||||
private void setLinear(int position) {
|
private void setLinear(int position) {
|
||||||
assert linear == false;
|
assert linear == false;
|
||||||
|
|
||||||
int state = runAutomaton.getInitialState();
|
int state = 0;
|
||||||
assert state == 0;
|
assert state == 0;
|
||||||
int maxInterval = 0xff;
|
int maxInterval = 0xff;
|
||||||
//System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
|
//System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
|
||||||
|
@ -182,7 +182,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||||
int state;
|
int state;
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
savedStates.grow(seekBytesRef.length()+1);
|
savedStates.grow(seekBytesRef.length()+1);
|
||||||
savedStates.setIntAt(0, runAutomaton.getInitialState());
|
savedStates.setIntAt(0, 0);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
curGen++;
|
curGen++;
|
||||||
|
|
|
@ -36,7 +36,7 @@ public class ByteRunAutomaton extends RunAutomaton {
|
||||||
* Returns true if the given byte array is accepted by this automaton
|
* Returns true if the given byte array is accepted by this automaton
|
||||||
*/
|
*/
|
||||||
public boolean run(byte[] s, int offset, int length) {
|
public boolean run(byte[] s, int offset, int length) {
|
||||||
int p = initial;
|
int p = 0;
|
||||||
int l = offset + length;
|
int l = offset + length;
|
||||||
for (int i = offset; i < l; i++) {
|
for (int i = offset; i < l; i++) {
|
||||||
p = step(p, s[i] & 0xFF);
|
p = step(p, s[i] & 0xFF);
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
|
||||||
* Returns true if the given string is accepted by this automaton.
|
* Returns true if the given string is accepted by this automaton.
|
||||||
*/
|
*/
|
||||||
public boolean run(String s) {
|
public boolean run(String s) {
|
||||||
int p = initial;
|
int p = 0;
|
||||||
int l = s.length();
|
int l = s.length();
|
||||||
for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) {
|
for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) {
|
||||||
p = step(p, cp = s.codePointAt(i));
|
p = step(p, cp = s.codePointAt(i));
|
||||||
|
@ -56,7 +56,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
|
||||||
* Returns true if the given string is accepted by this automaton
|
* Returns true if the given string is accepted by this automaton
|
||||||
*/
|
*/
|
||||||
public boolean run(char[] s, int offset, int length) {
|
public boolean run(char[] s, int offset, int length) {
|
||||||
int p = initial;
|
int p = 0;
|
||||||
int l = offset + length;
|
int l = offset + length;
|
||||||
for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
|
for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
|
||||||
p = step(p, cp = Character.codePointAt(s, i, l));
|
p = step(p, cp = Character.codePointAt(s, i, l));
|
||||||
|
|
|
@ -351,7 +351,7 @@ public class CompiledAutomaton {
|
||||||
|
|
||||||
//if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());
|
//if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString());
|
||||||
|
|
||||||
int state = runAutomaton.getInitialState();
|
int state = 0;
|
||||||
|
|
||||||
// Special case empty string:
|
// Special case empty string:
|
||||||
if (input.length == 0) {
|
if (input.length == 0) {
|
||||||
|
|
|
@ -32,7 +32,7 @@ package org.apache.lucene.util.automaton;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finite-state automaton with fast run operation.
|
* Finite-state automaton with fast run operation. The initial state is always 0.
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
@ -41,7 +41,6 @@ public abstract class RunAutomaton {
|
||||||
final int maxInterval;
|
final int maxInterval;
|
||||||
final int size;
|
final int size;
|
||||||
final boolean[] accept;
|
final boolean[] accept;
|
||||||
final int initial;
|
|
||||||
final int[] transitions; // delta(state,c) = transitions[state*points.length +
|
final int[] transitions; // delta(state,c) = transitions[state*points.length +
|
||||||
// getCharClass(c)]
|
// getCharClass(c)]
|
||||||
final int[] points; // char interval start points
|
final int[] points; // char interval start points
|
||||||
|
@ -53,7 +52,7 @@ public abstract class RunAutomaton {
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
b.append("initial state: ").append(initial).append("\n");
|
b.append("initial state: 0\n");
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
b.append("state " + i);
|
b.append("state " + i);
|
||||||
if (accept[i]) b.append(" [accept]:\n");
|
if (accept[i]) b.append(" [accept]:\n");
|
||||||
|
@ -92,13 +91,6 @@ public abstract class RunAutomaton {
|
||||||
return accept[state];
|
return accept[state];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns initial state.
|
|
||||||
*/
|
|
||||||
public final int getInitialState() {
|
|
||||||
return initial;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns array of codepoint class interval start points. The array should
|
* Returns array of codepoint class interval start points. The array should
|
||||||
* not be modified by the caller.
|
* not be modified by the caller.
|
||||||
|
@ -138,7 +130,6 @@ public abstract class RunAutomaton {
|
||||||
a = Operations.determinize(a, maxDeterminizedStates);
|
a = Operations.determinize(a, maxDeterminizedStates);
|
||||||
this.automaton = a;
|
this.automaton = a;
|
||||||
points = a.getStartPoints();
|
points = a.getStartPoints();
|
||||||
initial = 0;
|
|
||||||
size = Math.max(1,a.getNumStates());
|
size = Math.max(1,a.getNumStates());
|
||||||
accept = new boolean[size];
|
accept = new boolean[size];
|
||||||
transitions = new int[size * points.length];
|
transitions = new int[size * points.length];
|
||||||
|
@ -188,7 +179,6 @@ public abstract class RunAutomaton {
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
int result = 1;
|
int result = 1;
|
||||||
result = prime * result + initial;
|
|
||||||
result = prime * result + maxInterval;
|
result = prime * result + maxInterval;
|
||||||
result = prime * result + points.length;
|
result = prime * result + points.length;
|
||||||
result = prime * result + size;
|
result = prime * result + size;
|
||||||
|
@ -201,7 +191,6 @@ public abstract class RunAutomaton {
|
||||||
if (obj == null) return false;
|
if (obj == null) return false;
|
||||||
if (getClass() != obj.getClass()) return false;
|
if (getClass() != obj.getClass()) return false;
|
||||||
RunAutomaton other = (RunAutomaton) obj;
|
RunAutomaton other = (RunAutomaton) obj;
|
||||||
if (initial != other.initial) return false;
|
|
||||||
if (maxInterval != other.maxInterval) return false;
|
if (maxInterval != other.maxInterval) return false;
|
||||||
if (size != other.size) return false;
|
if (size != other.size) return false;
|
||||||
if (!Arrays.equals(points, other.points)) return false;
|
if (!Arrays.equals(points, other.points)) return false;
|
||||||
|
|
|
@ -170,7 +170,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean accepts(CompiledAutomaton c, BytesRef b) {
|
private boolean accepts(CompiledAutomaton c, BytesRef b) {
|
||||||
int state = c.runAutomaton.getInitialState();
|
int state = 0;
|
||||||
for(int idx=0;idx<b.length;idx++) {
|
for(int idx=0;idx<b.length;idx++) {
|
||||||
assertTrue(state != -1);
|
assertTrue(state != -1);
|
||||||
state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
|
state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
|
||||||
|
@ -291,7 +291,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
||||||
System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString()));
|
System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString()));
|
||||||
|
|
||||||
if (startTerm != null) {
|
if (startTerm != null) {
|
||||||
int state = c.runAutomaton.getInitialState();
|
int state = 0;
|
||||||
for(int idx=0;idx<startTerm.length;idx++) {
|
for(int idx=0;idx<startTerm.length;idx++) {
|
||||||
final int label = startTerm.bytes[startTerm.offset+idx] & 0xff;
|
final int label = startTerm.bytes[startTerm.offset+idx] & 0xff;
|
||||||
System.out.println(" state=" + state + " label=" + label);
|
System.out.println(" state=" + state + " label=" + label);
|
||||||
|
|
|
@ -166,12 +166,12 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
||||||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||||
// make sure character dfa accepts empty string
|
// make sure character dfa accepts empty string
|
||||||
assertTrue(cra.isAccept(cra.getInitialState()));
|
assertTrue(cra.isAccept(0));
|
||||||
assertTrue(cra.run(""));
|
assertTrue(cra.run(""));
|
||||||
assertTrue(cra.run(new char[0], 0, 0));
|
assertTrue(cra.run(new char[0], 0, 0));
|
||||||
|
|
||||||
// make sure byte dfa accepts empty string
|
// make sure byte dfa accepts empty string
|
||||||
assertTrue(bra.isAccept(bra.getInitialState()));
|
assertTrue(bra.isAccept(0));
|
||||||
assertTrue(bra.run(new byte[0], 0, 0));
|
assertTrue(bra.run(new byte[0], 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ public class MockTokenizer extends Tokenizer {
|
||||||
super(factory);
|
super(factory);
|
||||||
this.runAutomaton = runAutomaton;
|
this.runAutomaton = runAutomaton;
|
||||||
this.lowerCase = lowerCase;
|
this.lowerCase = lowerCase;
|
||||||
this.state = runAutomaton.getInitialState();
|
this.state = 0;
|
||||||
this.maxTokenLength = maxTokenLength;
|
this.maxTokenLength = maxTokenLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,7 +252,7 @@ public class MockTokenizer extends Tokenizer {
|
||||||
|
|
||||||
protected boolean isTokenChar(int c) {
|
protected boolean isTokenChar(int c) {
|
||||||
if (state < 0) {
|
if (state < 0) {
|
||||||
state = runAutomaton.getInitialState();
|
state = 0;
|
||||||
}
|
}
|
||||||
state = runAutomaton.step(state, c);
|
state = runAutomaton.step(state, c);
|
||||||
if (state < 0) {
|
if (state < 0) {
|
||||||
|
@ -270,7 +270,7 @@ public class MockTokenizer extends Tokenizer {
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
try {
|
try {
|
||||||
super.reset();
|
super.reset();
|
||||||
state = runAutomaton.getInitialState();
|
state = 0;
|
||||||
lastOffset = off = 0;
|
lastOffset = off = 0;
|
||||||
bufferedCodePoint = -1;
|
bufferedCodePoint = -1;
|
||||||
if (streamState == State.RESET) {
|
if (streamState == State.RESET) {
|
||||||
|
|
Loading…
Reference in New Issue