From 6fb9aef984d998086f45dd5143f13ffd803f0595 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Mon, 10 Oct 2016 13:43:46 -0400 Subject: [PATCH] automaton initial state is always 0 --- .../apache/lucene/analysis/hunspell/Stemmer.java | 2 +- .../blocktreeords/OrdsIntersectTermsEnum.java | 2 +- .../codecs/memory/DirectPostingsFormat.java | 2 +- .../lucene/codecs/memory/FSTOrdTermsReader.java | 2 +- .../lucene/codecs/memory/FSTTermsReader.java | 2 +- .../codecs/blocktree/IntersectTermsEnum.java | 2 +- .../apache/lucene/index/AutomatonTermsEnum.java | 6 +++--- .../lucene/util/automaton/ByteRunAutomaton.java | 2 +- .../util/automaton/CharacterRunAutomaton.java | 4 ++-- .../lucene/util/automaton/CompiledAutomaton.java | 2 +- .../lucene/util/automaton/RunAutomaton.java | 15 ++------------- .../org/apache/lucene/index/TestTermsEnum.java | 4 ++-- .../lucene/util/automaton/TestUTF32ToUTF8.java | 4 ++-- .../org/apache/lucene/analysis/MockTokenizer.java | 6 +++--- 14 files changed, 22 insertions(+), 33 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java index 7687d212c08..a7b4e8b2092 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java @@ -474,7 +474,7 @@ final class Stemmer { private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) { if (condition != 0) { CharacterRunAutomaton pattern = dictionary.patterns.get(condition); - int state = pattern.getInitialState(); + int state = 0; for (int i = c1off; i < c1off + c1len; i++) { state = pattern.step(state, c1[i]); if (state == -1) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java index 9e508bf44bd..6c2d2bcbc85 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java @@ -92,7 +92,7 @@ final class OrdsIntersectTermsEnum extends TermsEnum { final OrdsIntersectTermsEnumFrame f = stack[0]; f.fp = f.fpOrig = fr.rootBlockFP; f.prefix = 0; - f.setState(runAutomaton.getInitialState()); + f.setState(0); f.arc = arc; f.outputPrefix = arc.output; f.load(fr.rootCode); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index 56b1a36b775..3ce2abe4358 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -969,7 +969,7 @@ public final class DirectPostingsFormat extends PostingsFormat { states = new State[1]; states[0] = new State(); states[0].changeOrd = terms.length; - states[0].state = runAutomaton.getInitialState(); + states[0].state = 0; states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state); compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition); states[0].transitionUpto = -1; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java index 5967505410c..305c4194381 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java @@ -705,7 +705,7 @@ public class FSTOrdTermsReader extends FieldsProducer { /** Load frame for start arc(node) on fst */ Frame loadFirstFrame(Frame frame) throws IOException { frame.arc = fst.getFirstArc(frame.arc); - frame.state = fsa.getInitialState(); + frame.state = 0; return frame; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index 0bc43269956..775f6929548 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -602,7 +602,7 @@ public class FSTTermsReader extends FieldsProducer { /** Load frame for start arc(node) on fst */ Frame loadFirstFrame(Frame frame) throws IOException { frame.fstArc = fst.getFirstArc(frame.fstArc); - frame.fsaState = fsa.getInitialState(); + frame.fsaState = 0; return frame; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java index 3934c553d9b..19e56a40a04 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java @@ -124,7 +124,7 @@ final class IntersectTermsEnum extends TermsEnum { final IntersectTermsEnumFrame f = stack[0]; f.fp = f.fpOrig = fr.rootBlockFP; f.prefix = 0; - f.setState(runAutomaton.getInitialState()); + f.setState(0); f.arc = arc; f.outputPrefix = arc.output; f.load(fr.rootCode); diff --git a/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java index c322bb813e2..8aa10ece7f5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java @@ -110,7 +110,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { if (term == null) { assert seekBytesRef.length() == 0; // return the empty term, as it's valid - if (runAutomaton.isAccept(runAutomaton.getInitialState())) { + if (runAutomaton.isAccept(0)) { return seekBytesRef.get(); } } else { @@ -135,7 +135,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { private void setLinear(int position) { assert linear == false; - int state = runAutomaton.getInitialState(); + int state = 0; assert state == 0; int maxInterval = 0xff; //System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef); @@ -182,7 +182,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum { int state; int pos = 0; savedStates.grow(seekBytesRef.length()+1); - savedStates.setIntAt(0, runAutomaton.getInitialState()); + savedStates.setIntAt(0, 0); while (true) { curGen++; diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java index 138f5c467b5..ca14bc6dd3a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java @@ -36,7 +36,7 @@ public class ByteRunAutomaton extends RunAutomaton { * Returns true if the given byte array is accepted by this automaton */ public boolean run(byte[] s, int offset, int length) { - int p = initial; + int p = 0; int l = offset + length; for (int i = offset; i < l; i++) { p = step(p, s[i] & 0xFF); diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java index 2db30b4eda0..70ff9aa21f6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java @@ -43,7 +43,7 @@ public class CharacterRunAutomaton extends RunAutomaton { * Returns true if the given string is accepted by this automaton. */ public boolean run(String s) { - int p = initial; + int p = 0; int l = s.length(); for (int i = 0, cp = 0; i < l; i += Character.charCount(cp)) { p = step(p, cp = s.codePointAt(i)); @@ -56,7 +56,7 @@ public class CharacterRunAutomaton extends RunAutomaton { * Returns true if the given string is accepted by this automaton */ public boolean run(char[] s, int offset, int length) { - int p = initial; + int p = 0; int l = offset + length; for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) { p = step(p, cp = Character.codePointAt(s, i, l)); diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java index cd599e335cd..bd00a709afb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java @@ -351,7 +351,7 @@ public class CompiledAutomaton { //if (DEBUG) System.out.println("CA.floor input=" + input.utf8ToString()); - int state = runAutomaton.getInitialState(); + int state = 0; // Special case empty string: if (input.length == 0) { diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java index 2c298ca81be..1d640954e13 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java @@ -32,7 +32,7 @@ package org.apache.lucene.util.automaton; import java.util.Arrays; /** - * Finite-state automaton with fast run operation. + * Finite-state automaton with fast run operation. The initial state is always 0. * * @lucene.experimental */ @@ -41,7 +41,6 @@ public abstract class RunAutomaton { final int maxInterval; final int size; final boolean[] accept; - final int initial; final int[] transitions; // delta(state,c) = transitions[state*points.length + // getCharClass(c)] final int[] points; // char interval start points @@ -53,7 +52,7 @@ public abstract class RunAutomaton { @Override public String toString() { StringBuilder b = new StringBuilder(); - b.append("initial state: ").append(initial).append("\n"); + b.append("initial state: 0\n"); for (int i = 0; i < size; i++) { b.append("state " + i); if (accept[i]) b.append(" [accept]:\n"); @@ -92,13 +91,6 @@ public abstract class RunAutomaton { return accept[state]; } - /** - * Returns initial state. - */ - public final int getInitialState() { - return initial; - } - /** * Returns array of codepoint class interval start points. The array should * not be modified by the caller. @@ -138,7 +130,6 @@ public abstract class RunAutomaton { a = Operations.determinize(a, maxDeterminizedStates); this.automaton = a; points = a.getStartPoints(); - initial = 0; size = Math.max(1,a.getNumStates()); accept = new boolean[size]; transitions = new int[size * points.length]; @@ -188,7 +179,6 @@ public abstract class RunAutomaton { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + initial; result = prime * result + maxInterval; result = prime * result + points.length; result = prime * result + size; @@ -201,7 +191,6 @@ public abstract class RunAutomaton { if (obj == null) return false; if (getClass() != obj.getClass()) return false; RunAutomaton other = (RunAutomaton) obj; - if (initial != other.initial) return false; if (maxInterval != other.maxInterval) return false; if (size != other.size) return false; if (!Arrays.equals(points, other.points)) return false; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java index b074f815f61..cd72f416e9d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java @@ -170,7 +170,7 @@ public class TestTermsEnum extends LuceneTestCase { } private boolean accepts(CompiledAutomaton c, BytesRef b) { - int state = c.runAutomaton.getInitialState(); + int state = 0; for(int idx=0;idx" : startTerm.utf8ToString())); if (startTerm != null) { - int state = c.runAutomaton.getInitialState(); + int state = 0; for(int idx=0;idx